/************************************************************************/
/*                                                                      */
/*      RETable -- Efficient language for implementing regexp searches  */
/*                                                                      */
/*      This module converts a regular expression description           */
/*      from a compact form (as created by RegExp) to a                 */
/*      high-performance table-driven form (as described by             */
/*      MatchEng).  Several optimisations are available that            */
/*      can significantly improve search performance.  These            */
/*      optimisations fall into two categories: some try to             */
/*      minimise backtracking initiated by iterative states             */
/*      by analysing the state table following the iterative            */
/*      state; others use string or byte searches to improve            */
/*      the speed of scanning the text for possible matches.            */
/*                                                                      */
/*      Copyright (C) 1995-2000 Grouse Software.  All rights reserved.  */
/*      Written for Grouse by behoffski (Brenton Hoff).                 */
/*                                                                      */
/*      Free software: no warranty; use anywhere is ok; spread the      */
/*      sources; note any mods; share variations and derivatives        */
/*      (including sending to behoffski@grouse.com.au).                 */
/*                                                                      */
/************************************************************************/

#include "ascii.h"
#include <compdef.h>
#include <ctype.h>
#include "matcheng.h"
#include "regexp00.h"
#include "retable.h"
#include <stdlib.h>
#include <stdio.h>

/*The curse of a simpleminded NFA engine: backtracking stacks may be huge*/
#define BACKTRACKING_STACK_SIZE_MAX     65536uL

/*We need to allocate MORE stack if FastFile presents an even bigger buffer*/

#define IS_WORD(ch)  (isalnum(ch) || ((ch) == '_'))

#define ROUND_PARAGRAPH(x)              (((x) + 15) & ~0x0fuL)

/*Module-wide variables*/

typedef struct {
        /*Additional state table(s) to handle special cases*/

        /*Increment line count after attempting matches triggered by LF*/
        BOOL IncLineTableInitialised;
        MatchEng_Action IncLineTable[MATCHENG_TABLE_SIZE];

} RETABLE_MODULE_CONTEXT;

module_scope RETABLE_MODULE_CONTEXT gRETable;

/************************************************************************/
/*                                                                      */
/*      Init -- Prepare module for operation                            */
/*                                                                      */
/************************************************************************/
public_scope void
RETable_Init(void)
{
        /*No line-count adjustment table set up by default*/
        gRETable.IncLineTableInitialised = FALSE;

} /*Init*/


/************************************************************************/
/*                                                                      */
/*      Start -- Begin managing what has to be managed                  */
/*                                                                      */
/************************************************************************/
public_scope BOOL
RETable_Start(void)
{
        /*No preparation needed*/
        return TRUE;

} /*Start*/


/************************************************************************/
/*                                                                      */
/*      SetState -- Write actions and flags for state                   */
/*                                                                      */
/*      Used to initialise state tables and flags with constants.       */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_SetState(MatchEng_Spec *pSpec, UINT StateNr, 
                 MatchEng_Action Action, MatchEng_TableFlags Flags)
{
        MatchEng_Action *pAct;
        MatchEng_Action *pActEnd;

        /*Initialise table entries*/
        pAct = &pSpec->pTables[StateNr * MATCHENG_TABLE_SIZE];
        pActEnd = pAct + MATCHENG_TABLE_SIZE;
        while (pAct != pActEnd) {
                *pAct++ = Action;
        }

        /*Also write table flags*/
        pSpec->pTableFlags[StateNr] = Flags;

} /*SetState*/



/************************************************************************/
/*                                                                      */
/*      AllocBacktrack -- (Re-)Allocate space for backtracking          */
/*                                                                      */
/*      The backtracking stack must have two entries per buffer byte    */
/*      (plus a couple of overhead entries) to handle the worst-case    */
/*      search request.  This function frees the previous stack         */
/*      (if allocated), then allocates a stack large enough to          */
/*      handle the specified size.  The memory consumption of this      */
/*      approach is horrifically large, but it's the best we can        */
/*      do until ggrep is rewritten to use DFAs.                        */
/*                                                                      */
/************************************************************************/
public_scope BOOL
RETable_AllocBacktrack(MatchEng_Spec *pSpec, UINT32 TextSize)
{
        UINT32 EntryPairsNeeded;

        /*Did we have a previous buffer?*/
        if (pSpec->pBacktrackMem != NULL) {
                /*Yes, free it now*/
                free(pSpec->pBacktrackMem);
        }

        /*Work out space needed and malloc it*/
        EntryPairsNeeded = TextSize + 4;
        pSpec->BacktrackSize = TextSize;
        pSpec->pBacktrackMem = malloc(EntryPairsNeeded * 2 * 
                                      sizeof(MatchEng_Action));
        if (pSpec->pBacktrackMem == NULL) {
                return FALSE;
        }

        /*Allocated memory successfully*/
        return TRUE;

} /*AllocBacktrack*/


/************************************************************************/
/*                                                                      */
/*      Create -- Acquire resources and initialise memory               */
/*                                                                      */
/*      Allocates memory, acquires any other needed resources,          */
/*      and initialises the table-driven RE search spec to a            */
/*      "blank" state.  Initialisation includes preparing the           */
/*      initial failure (backtracking underflow) state, and             */
/*      writing defaults for the remaining states.                      */
/*                                                                      */
/*      Returns FALSE if unable to obtain the required resources.       */
/*                                                                      */
/************************************************************************/
module_scope BOOL
RETable_Create(RegExp_Specification *pRE, MatchEng_Spec **ppSpec)
{
        UINT NrStates;
        BYTE *pMemory;
        MatchEng_Spec *pSpec;
        MatchEng_Action *pAct;
        MatchEng_Action *pActEnd;
        UINT SpecSize;
        UINT i;

        /*Adjust state count to include fail, start and success states*/
        NrStates = pRE->NrStates + 2;

        /*Acquire space for spec, tables, flags, endmarker actions etc*/
        /*All sizes are rounded up to the next paragraph (16-byte) boundary*/
        SpecSize =  ROUND_PARAGRAPH(sizeof(MatchEng_Spec));
        SpecSize += ROUND_PARAGRAPH(NrStates * (MATCHENG_TABLE_SIZE * 
                                                sizeof(MatchEng_Action)));
        SpecSize += ROUND_PARAGRAPH(NrStates * sizeof(MatchEng_TableFlags));
        SpecSize += ROUND_PARAGRAPH(NrStates * sizeof(MatchEng_Action));
        SpecSize += ROUND_PARAGRAPH(NrStates * sizeof(MatchEng_Action));

        pMemory = (BYTE *) malloc(SpecSize);
        if (pMemory == NULL) {
                /*Sorry, unable to acquire memory*/
                return FALSE;
        }

        /*Set up specification and pointers into working memory*/
        pSpec = (MatchEng_Spec *) pMemory;

        pMemory += ROUND_PARAGRAPH(sizeof(*pSpec));

        /*Create pointers into various sections of RAM*/
        pSpec->pEndmarkerActions  = (MatchEng_Action *) pMemory;
        pMemory += ROUND_PARAGRAPH(NrStates * sizeof(MatchEng_Action *));
        pSpec->pNotEndmarkerActions = (MatchEng_Action *) pMemory;
        pMemory += ROUND_PARAGRAPH(NrStates * sizeof(MatchEng_Action *));
        pSpec->pTables            = (MatchEng_Action *) pMemory;
        pMemory += ROUND_PARAGRAPH(NrStates * sizeof(MatchEng_Action *) *
                                   MATCHENG_TABLE_SIZE);
        pSpec->pTableFlags        = (MatchEng_TableFlags *) pMemory;

        /*Horrible hack: preallocate backtracking stack memory now*/
        if (! RETable_AllocBacktrack(pSpec, BACKTRACKING_STACK_SIZE_MAX)) {
                /*Sorry, unable to acquire backtrack stack memory*/
                free(pSpec);
                return FALSE;
        }

        /*Backtracking stack works from end of spec memory backwards*/
        pSpec->pStack = (MatchEng_Action *) pSpec->pBacktrackMem;
        pSpec->pStack += pSpec->BacktrackSize - 1;

        /*Default to starting on first table of RE (start table)*/
        pSpec->StartTableNr = 0;

        /*First table contains failure state to simplify backtracking*/
        RETable_SetState(pSpec, 0, ABANDON, MATCHENG_TABLE_TYPE_FAIL);

        /*?? Backtracking underflow should not happen with buffer search?*/

        /*Second table is start (buffer scan) state*/
        pSpec->pTableFlags[1] = MATCHENG_TABLE_TYPE_START;

        /*(Note: Flag for start table changes if search is anchored to start)*/

        /*Initialise remaining table entries to default to NO_MATCH*/
        pAct = pSpec->pTables + MATCHENG_TABLE_SIZE * 2;
        pActEnd = pAct + (pRE->NrStates - 1) * MATCHENG_TABLE_SIZE;
        while (pAct != pActEnd) {
                *pAct++ = NO_MATCH;
        }

        /*Final table (Success) ignored for now*/

        /*Initialise byte search*/
        pSpec->ByteSearchAdvance = 0;

        /*Adjustment to match used if BYTE_SEARCH looks for start anchor*/
        pSpec->StartAdjustment = 0;

        /*Initialise Endmarker/NotEndmarker actions (most remain unused)*/
        pSpec->pEndmarkerActions[0]    = ABANDON;
        pSpec->pNotEndmarkerActions[0] = ABANDON;
        for (i = 1; i < NrStates; i++) {
                pSpec->pEndmarkerActions[i]    = NO_MATCH;
                pSpec->pNotEndmarkerActions[i] = NO_MATCH;
        }

        /*Created specification successfully*/
        *ppSpec = pSpec;
        return TRUE;

} /*Create*/

/************************************************************************/
/*                                                                      */
/*      WordMetaState -- Set up entries to test word characteristics    */
/*                                                                      */
/*      Matching words can be achieved very efficiently using           */
/*      the threaded-table facility, but some analysis shows that       */
/*      there are a lot of variations on word matching:                 */
/*                                                                      */
/*      1. "\<" matches a word beginning: The previous char must        */
/*         be a nonword char and the current char must be a word        */
/*         char.  (Where "word" is defined as [A-Za-z0-9_].)            */
/*      2. "\>" matches a word end: Prev must be word and               */
/*         current must be nonword.                                     */
/*      3. "\b" matches a word boundary: Prev word and current          */
/*         nonword, or vice versa.                                      */
/*      4. "\B" matches a word NONboundary: both prev and current       */
/*         must be a word char or both must be nonword.                 */
/*      5. Specifying "-w" is not the same as adding "\<" to the        */
/*         start of the RE and "\>" to the end: Using -w means          */
/*         check that the characters adjacent to the match text         */
/*         are non-word chars without constraining the match            */
/*         text istelf.  We add a pure PREV_NONWORD state at            */
/*         start and modify the Success state at the end to only        */
/*         match nonword characters -- see Expand (below) for           */
/*         these cases.                                                 */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_WordMetaState(MatchEng_Spec *pSpec, UINT StateNr, 
                      MatchEng_TableFlags Flags, 
                      MatchEng_Action NonwordAction, 
                      MatchEng_Action WordAction)
{
        UINT ByteIx;
        MatchEng_Action *pAction = &pSpec->pTables[StateNr * 
                                                  MATCHENG_TABLE_SIZE];

        /*Write flag for this state*/
        pSpec->pTableFlags[StateNr] = Flags;

        /*Set up word and nonword actions for the state*/
        for (ByteIx = 0; ByteIx < MATCHENG_TABLE_SIZE; ByteIx++) {
                /*Is this byte a word character?  (nb: ASSUMES ASCII)*/
                if (((ByteIx >= '0') && (ByteIx <= '9')) || 
                    ((ByteIx >= 'A') && (ByteIx <= 'Z')) || 
                    ((ByteIx >= 'a') && (ByteIx <= 'z')) || 
                    (ByteIx == '_')) {
                        /*Yes, current char is a word char*/
                        pAction[ByteIx] = WordAction;

                } else {
                        /*No, current char is non-word*/
                        pAction[ByteIx] = NonwordAction;
                }

        }

} /*WordMetaState*/


/************************************************************************/
/*                                                                      */
/*      AddMatchChars -- Write action for chars that match RE code      */
/*                                                                      */
/*      This function interprets what characters match the specified    */
/*      regular expression code, and changes the table entries for      */
/*      those characters to ADVANCE.  It returns a type code            */
/*      summarising table behaviour so that later users of the          */
/*      table don't need to reverse-engineer the table type.            */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_AddMatchChars(MatchEng_Spec *pSpec, UINT StateNr, RE_ELEMENT **ppRE)
{
        UINT Lit;
        UINT ByteIx;
        BOOL CaseSensitive = FALSE;
        RE_ELEMENT *pRE = *ppRE;
        MatchEng_Action *pAction;

        pAction = &pSpec->pTables[StateNr * MATCHENG_TABLE_SIZE];

        /*Is this element marked as case-sensitive?*/
        if ((*pRE & REGEXP_CASE_MASK) == REGEXP_CASE_SENSITIVE) {
                /*Yes, remember to set up tables correctly*/
                CaseSensitive = TRUE;
        }

        switch (REGEXP_TYPE_R(*pRE++)) {
        case REGEXP_TYPE_MATCH_ANY:
                /*All entries except LF match*/
                RETable_SetState(pSpec, StateNr, ADVANCE, 
                                 MATCHENG_TABLE_TYPE_MATCH_ANY);
                pAction[LF] = NO_MATCH;
                break;

        case REGEXP_TYPE_CLASS:
                /*Match class: Decode bitmask specifier into table entries*/
                for (ByteIx = 0; ByteIx < MATCHENG_TABLE_SIZE; ByteIx++) {
                        if (REGEXP_BIT_TEST(pRE, ByteIx)) {
                                if (CaseSensitive) {
                                        pAction[ByteIx] = ADVANCE;
                                } else {
                                        pAction[tolower(ByteIx)] = ADVANCE;
                                        pAction[toupper(ByteIx)] = ADVANCE;
                                }
                        }
                }

                /*Set flag and move to next RE code*/
                pSpec->pTableFlags[StateNr] = MATCHENG_TABLE_TYPE_CLASS;
                pRE += REGEXP_BITMASK_NRCODES;
                break;

        case REGEXP_TYPE_CLASS_ALL_BUT:
                /*Decode bitmask specifier into table entries*/
                for (ByteIx = 0; ByteIx < MATCHENG_TABLE_SIZE; ByteIx++) {
                        if (CaseSensitive) {
                                if (! REGEXP_BIT_TEST(pRE, ByteIx)) {
                                        pAction[ByteIx] = ADVANCE;
                                }
                        } else {
                                if (! (REGEXP_BIT_TEST(pRE, 
                                                       tolower(ByteIx)) ||
                                       REGEXP_BIT_TEST(pRE, 
                                                       toupper(ByteIx)))) {
                                        pAction[ByteIx] = ADVANCE;
                                }
                        }
                }
                pSpec->pTableFlags[StateNr] = MATCHENG_TABLE_TYPE_ALL_BUT;
                pRE += REGEXP_BITMASK_NRCODES;
                break;

        case REGEXP_TYPE_LITERAL:
                /*Is the character handled directly by the tables?*/
                Lit = REGEXP_LITERAL_R(pRE[-1]);
                if (Lit >= MATCHENG_TABLE_SIZE) {
                        /*No, cannot expand*/
                        printf("?? RETable_AddMatchChars: " 
                                            "Literals >%u not handled: %u\n", 
                               MATCHENG_TABLE_SIZE, Lit);
                        break;
                }

                /*Is this search case-sensitive?*/
                if (CaseSensitive) {
                        /*Yes, merely let literal match*/
                        pAction[Lit] = ADVANCE;

                } else {
                        /*No, ensure both lower and upper case match*/
                        pAction[tolower(Lit)] = ADVANCE;
                        pAction[toupper(Lit)] = ADVANCE;
                }

                pSpec->pTableFlags[StateNr] = MATCHENG_TABLE_TYPE_LITERAL;
                break;

        case REGEXP_TYPE_DEAD_END:
                /*No characters match: fill table with ABANDON*/
                RETable_SetState(pSpec, StateNr, ABANDON, 
                                 MATCHENG_TABLE_TYPE_DEAD_END);
                break;

        case REGEXP_TYPE_WORD_BEGINNING:
                /*Matching word chars test nonword char in prev position*/
                RETable_WordMetaState(pSpec, StateNr, 
                                      MATCHENG_TABLE_TYPE_WORD_BEGINNING | 
                                              MATCHENG_TABLE_WIDTH_META, 
                                      NO_MATCH, 
                                      PREV_NONWORD);
                break;

        case REGEXP_TYPE_WORD_END:
                /*Matching nonword chars test word char in prev position*/
                RETable_WordMetaState(pSpec, StateNr, 
                                      MATCHENG_TABLE_TYPE_WORD_END | 
                                              MATCHENG_TABLE_WIDTH_META, 
                                      PREV_WORD, 
                                      NO_MATCH);
                break;

        case REGEXP_TYPE_WORD_BOUNDARY:
                /*Test word/prev nonword and nonword/prev word combinations*/
                RETable_WordMetaState(pSpec, StateNr, 
                                      MATCHENG_TABLE_TYPE_WORD_BOUNDARY | 
                                              MATCHENG_TABLE_WIDTH_META, 
                                      PREV_WORD, 
                                      PREV_NONWORD);
                break;

        case REGEXP_TYPE_WORD_NONBOUNDARY:
                /*Test word/prev word and nonword/prev nonword combinations*/
                RETable_WordMetaState(pSpec, StateNr, 
                                      MATCHENG_TABLE_TYPE_WORD_NONBOUNDARY | 
                                              MATCHENG_TABLE_WIDTH_META, 
                                      PREV_NONWORD, 
                                      PREV_WORD);
                break;

        default:
                /*Unknown RE element*/
                printf("?? RETable_AddMatchChars: Code %08lx not handled", 
                       pRE[-1]);
                break;
        }

        /*Write pointer to next code back to caller*/
        *ppRE = pRE;

} /*AddMatchChars*/


/************************************************************************/
/*                                                                      */
/*      ApplyIteration -- Apply RE iteration modifier to tables         */
/*                                                                      */
/*      Examines the iteration modifier(s) and modifies the             */
/*      current table to apply the iteration.  1-or-more iteration      */
/*      is implemented by cloning the table and then applying           */
/*      0-or-more iteration to the cloned table.                        */
/*                                                                      */
/*      The return value of the function reports how many state         */
/*      tables were added to the expanded form.                         */
/*                                                                      */
/************************************************************************/
module_scope UINT
RETable_ApplyIteration(MatchEng_Spec *pSpec, UINT StateNr, RE_ELEMENT *pRE)
{
        UINT i;
        UINT StatesAdded = 0;
        WORD RepeatSpec;
        MatchEng_Action *pAction;

        pAction = &pSpec->pTables[StateNr * MATCHENG_TABLE_SIZE];

        /*Handle repeat modifier*/
        RepeatSpec = REGEXP_REPEAT_R(*pRE);
        switch (RepeatSpec) {
        case REGEXP_REPEAT_1_OR_MORE:
                /*Duplicate details so copy may handle iteration*/
                for (i = 0; i < MATCHENG_TABLE_SIZE; i++) {
                        pAction[i + MATCHENG_TABLE_SIZE] = pAction[i];
                }
                pAction += MATCHENG_TABLE_SIZE;
                pSpec->pTableFlags[StateNr + 1] = 
                        pSpec->pTableFlags[StateNr];
                StateNr++;
                StatesAdded = 1;
                /*FALLTHROUGH*/

        case REGEXP_REPEAT_0_OR_MORE:
                /*Convert table to use 0-or-more iteration*/
                for (i = 0; i < MATCHENG_TABLE_SIZE; i++) {
                        /*Change this table action*/
                        if (*pAction == ADVANCE) {
                                *pAction++ = AGAIN_PUSH_ADVANCE;
                                
                        } else {
                                *pAction++ = BACK_AND_ADVANCE;
                                
                        }

                }

                pSpec->pTableFlags[StateNr] |= 
                        MATCHENG_TABLE_ITERATIVE | MATCHENG_TABLE_ZERO_TRIP;
                break;

        case REGEXP_REPEAT_0_OR_1:
                /*Change no-match cases to attempt zero-trip advance*/
                for (i = 0; i < MATCHENG_TABLE_SIZE; i++, pAction++) {
                        /*Change this table action*/
                        if (*pAction == NO_MATCH) {
                                *pAction = BACK_AND_ADVANCE;
                        
                        } else if (*pAction == ADVANCE) {
                                /*Matched, but also remember nonmatch case*/
                                *pAction = ADVANCE_PUSH_ZERO;

                        }

                }
                pSpec->pTableFlags[StateNr] |= MATCHENG_TABLE_ZERO_TRIP;
                break;

        case REGEXP_REPEAT_MATCH_SELF:
                /*No modifier*/
                break;

        default:
                printf("RETable_ApplyIteration: Repeat %u?\n", 
                       RepeatSpec);
                break;

        }

        return StatesAdded;

} /*ApplyIteration*/


/************************************************************************/
/*                                                                      */
/*      AnchoredToEnd -- Set up tables where RE is merely end anchor    */
/*                                                                      */
/*      For most searches anchored to the end, we merely change         */
/*      the success state so that only end-of-line characters           */
/*      are treated as success.  However, if the RE is merely           */
/*      the end anchor (e.g. /$/), then we use a tailored byte          */
/*      search to find the end of the line (including handling          */
/*      lines terminated by CR/LF if selected).                         */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_AnchoredToEnd(MatchEng_Spec *pSpec)
{
        MatchEng_Action *pStart;
        UINT i;

        /*Modify start search actions to invoke EOL search*/
        RETable_SetState(pSpec, 1, END_OF_LINE_SEARCH, 
                         MATCHENG_TABLE_TYPE_START);
        pStart = &pSpec->pTables[MATCHENG_TABLE_SIZE];
        pStart[LF] = END_OF_LINE_MATCH;

        /*Have we initialised the deferred line inc table?*/
        if (! gRETable.IncLineTableInitialised) {
                /*No, initialise it now*/
                for (i = 0; i < MATCHENG_TABLE_SIZE; i++) {
                        gRETable.IncLineTable[i] = 
                                INC_LINE_COUNT;
                }
                gRETable.IncLineTableInitialised = TRUE;
        }

        /*Provide table reference for search to use*/
        pSpec->pIncLineTable = gRETable.IncLineTable;



} /*AnchoredToEnd*/


/************************************************************************/
/*                                                                      */
/*      ScanForLineStart -- Search for LF for anchored search           */
/*                                                                      */
/*      Sets up the search for searches anchored to the start           */
/*      of the line.  This entails:                                     */
/*           - knowing that buffer searches always start on a           */
/*                    line boundary,                                    */
/*           - starting on first match state since we know              */
/*                    anchor matches at buffer start,                   */
/*           - setting up the start state to become a "scan for         */
/*                    LF (line start)" state, and                       */
/*           - getting match engine to push a backtrack reference       */
/*                    pointing to start state when it starts            */
/*                    on the first match state.                         */
/*                                                                      */
/*      Since scanning for LF is very much like the BYTE_SEARCH         */
/*      operation, we use that action -- but with the following         */
/*      modifications:                                                  */
/*           - We advance 1 table instead of 2 when byte found.         */
/*           - The trailing literal for the byte search is LF.          */
/*           - We adjust any completed match to remove the LF           */
/*                  we found at the start of the RE.                    */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_ScanForLineStart(MatchEng_Spec *pSpec)
{
        /*All chars except LF trigger search for LF*/
        RETable_SetState(pSpec, 1, START_OF_LINE_SEARCH, 
                         MATCHENG_TABLE_TYPE_START_LINE);
        pSpec->pTables[MATCHENG_TABLE_SIZE + LF] = CHECK_BUFFER;
        pSpec->pEndmarkerActions[1]    = ABANDON;
        pSpec->pNotEndmarkerActions[1] = FOUND_LINE_START;
        pSpec->TrailingLiteral = LF;
        pSpec->EndCondition = MATCHENG_CONDITION_TRAILING_LITERAL;
        pSpec->PatternLength = 1;
        pSpec->ByteSearchAdvance = 1;
        pSpec->StartTableNr = 1;
        pSpec->StartAdjustment = 1;

        /*SIGH: Non-zero offset used by match to indicate skip search*/
        pSpec->SearchSkipOffset = 1;

} /*ScanForLineStart*/


/************************************************************************/
/*                                                                      */
/*      AnalyseStart -- Look for easy(ish) components at start of RE    */
/*                                                                      */
/*      This function inspects the start of the table version of        */
/*      the regular expression, and counts how many elements can        */
/*      be scanned using skip-style search techniques such as           */
/*      the Boyer-Moore algorithm or the CPU string search              */
/*      instruction.  The analysis is used to help decide how to        */
/*      scan the buffer for potential RE match positions.               */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_AnalyseStart(MatchEng_Spec *pSpec, UINT *pNrEasyStates, 
                     BOOL *pFirstIsLiteral, BYTE *pFirstLiteral)
{
        UINT States;
        MatchEng_TableFlags *pFlags;
        MatchEng_Action *pAction;
        UINT Entry;
        UINT MatchEntry = 0;
        UINT Matches;

        /*Start analysis with first match state (skip fail and start tables)*/
        pFlags  = &pSpec->pTableFlags[pSpec->FirstRealStateNr];
        States = 0;

        /*Use summary of search contained in state flags for analysis*/
        for (; ; pFlags++, States++) {
                /*Does this table contain nonlinear effects?*/
                if (*pFlags & (MATCHENG_TABLE_ITERATIVE | 
                               MATCHENG_TABLE_ZERO_TRIP)) {
                        /*Sorry, state table needs > 1 second's thought*/
                        break;

                }

                /*Is this table worth optimising?*/
                switch (MATCHENG_TABLE_TYPE_R(*pFlags)) {
                case MATCHENG_TABLE_TYPE_LITERAL:
                case MATCHENG_TABLE_TYPE_CLASS:
                        /*These are okay (but what if class matches lots?)*/
                        continue;

                }

                /*Any table which gets us here is NOT okay: finish summary*/
                break;

        }

        /*Count how many entries in the first RE state match*/
        Matches = 0;
        pAction = &pSpec->pTables[pSpec->FirstRealStateNr * 
                                  MATCHENG_TABLE_SIZE];
        for (Entry = 0; Entry < MATCHENG_TABLE_SIZE; Entry++) {
                if (pAction[Entry] != NO_MATCH) {
                        /*Count match and remember it in case there's only 1*/
                        Matches++;
                        MatchEntry = Entry;
                }
        }

        /*Did the first state only match one character?*/
        *pFirstIsLiteral = FALSE;
        if (Matches == 1) {
                /*Yes, tell client of opportunity for first table*/
                *pFirstIsLiteral = TRUE;
                *pFirstLiteral   = (BYTE) MatchEntry;
        }

        /*Report how many easy(ish) states we found*/
        *pNrEasyStates = States;

} /*AnalyseStart*/


/************************************************************************/
/*                                                                      */
/*      ScanByBoyerMoore -- Set up start state to use BM                */
/*                                                                      */
/*      This function sets up the starting state to use a Boyer-        */
/*      Moore style string search algorithm.  It is more powerful       */
/*      than most simpleminded BM searches as the string may            */
/*      include more sophisticated components such as classes           */
/*      and/or case insensitive literals.                               */
/*                                                                      */
/*      The starting table is set up by looking at the rightmost        */
/*      table of the string and working backwards towards the           */
/*      start of the RE.  If the last table matches, it triggers        */
/*      a match attempt; otherwise, we count how many preceding         */
/*      states don't match, and add a skip action for that number       */
/*      of states to the starting table.                                */
/*                                                                      */
/*      The match could be optimised further if we started              */
/*      rearranging the RE match tables, but this is too complex        */
/*      just now.  Optimisations could include not retesting the        */
/*      last character of the string during the match, and              */
/*      using a (self-tuning?) guard test.  Further optimisations       */
/*      could be added if we were confident about the input             */
/*      characteristics (e.g. shorten string if last component          */
/*      becomes substantially less likely to match).                    */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_ScanByBoyerMoore(MatchEng_Spec *pSpec, UINT NrStates)
{
        MatchEng_Action *pLast;
        MatchEng_Action *pStart;
        MatchEng_Action *pSearch;
        MatchEng_Action *pFirstReal;
        UINT i;
        UINT SkipSize;
        UINT Endmarker;

        /*Set up to scan state tables and to build starting state*/
        pStart = &pSpec->pTables[MATCHENG_TABLE_SIZE];
        pFirstReal = &pSpec->pTables[pSpec->FirstRealStateNr * 
                                    MATCHENG_TABLE_SIZE];
        pLast = &pSpec->pTables[(NrStates + pSpec->FirstRealStateNr - 1) * 
                                MATCHENG_TABLE_SIZE];
        pSpec->SearchSkipOffset = NrStates - 1;

        /*Loop through each character of table, looking for improvements*/
        for (i = 0; i < MATCHENG_TABLE_SIZE; i++, pLast++, pStart++) {
                /*Does this character match in the last state of the string?*/
                if (*pLast != NO_MATCH) {
                        /*Yes, we pause scanning and attempt match*/
                        *pStart = START_OFFSET_MATCH;

                        /*Note char as terminator for search*/
                        pSpec->TrailingLiteral = (BYTE) i;

                        continue;
                }

                /*Find out how far non-match activity extends back*/
                pSearch = pLast - MATCHENG_TABLE_SIZE;
                *pStart = ADVANCE;
                for (;;) {
                        /*Does this character not match in this table?*/
                        if (*pSearch != NO_MATCH) {
                                /*No, finished looking*/
                                break;
                        }

                        /*Move to the next previous table*/
                        pSearch -= MATCHENG_TABLE_SIZE;

                }

                /*Work out how many characters can be skipped*/
                SkipSize = (UINT) ((pLast - pSearch) / 
                                            MATCHENG_TABLE_SIZE);

                /*Note: Can "see" more skip states if meta tables present*/

                /*Is this skip the maximum possible?*/
                if (SkipSize >= NrStates) {
                        /*Yes, use efficient action*/
                        *pStart = SKIP_MAX;
                        continue;
                }

                /*Write search action based on survey*/
                switch (SkipSize) {
                case  1: *pStart = AGAIN; break;
                case  2: *pStart = SKIP2;  break;
                case  3: *pStart = SKIP3;  break;
                case  4: *pStart = SKIP4;  break;
                case  5: *pStart = SKIP5;  break;
                case  6: *pStart = SKIP6;  break;
                case  7: *pStart = SKIP7;  break;
                case  8: *pStart = SKIP8;  break;
                case  9: *pStart = SKIP9;  break;
                case 10: *pStart = SKIP10; break;
                case 11: *pStart = SKIP11; break;
                case 12: *pStart = SKIP12; break;
                case 13: *pStart = SKIP13; break;
                case 14: *pStart = SKIP14; break;
                case 15: *pStart = SKIP15; break;
                case 16: *pStart = SKIP16; break;
                case 17:
                default:
                         *pStart = SKIP17; break;

                }

        }

        /*Use endmarker to terminate buffer search*/
        Endmarker = MATCHENG_SPEC_ENDMARKER_R(pSpec->SpecFlags);
        pSpec->EndCondition = MATCHENG_CONDITION_TRAILING_LITERAL;
        pSpec->PatternLength = NrStates;
        pSpec->TrailingLiteral = Endmarker;
        pStart = &pSpec->pTables[MATCHENG_TABLE_SIZE + Endmarker];
        pSpec->pEndmarkerActions[1] = ABANDON;
        pSpec->pNotEndmarkerActions[1] = *pStart;
        *pStart = CHECK_BUFFER;

} /*ScanByBoyerMoore*/


/************************************************************************/
/*                                                                      */
/*      ScanByByteSearch -- Set up start state to use CPU instruction   */
/*                                                                      */
/*      Although the threaded assembly technique used in the            */
/*      table-driven engine is extremely fast, it can't scan for        */
/*      bytes as quickly as the CPU byte search instruction.            */
/*      This function sets up the starting state to scan using          */
/*      the CPU instruction.                                            */
/*                                                                      */
/*      Parameter OptimiseFirst turns on an extra optimisation          */
/*      since matching a literal in the starting state often            */
/*      means we can take matching in the first state for               */
/*      granted.  The switch is needed where we need to perform         */
/*      meta-tests -- such as testing for word boundaries (-w           */
/*      switch or \< at the start of the RE).  In this case, we         */
/*      mustn't optimise away the match on the first state.             */
/*                                                                      */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_ScanByByteSearch(MatchEng_Spec *pSpec, BYTE FirstLiteral, 
                         BOOL OptimiseFirst)
{
        MatchEng_Action *pStart;

        /*All entries except literal trigger CPU search instruction*/
        RETable_SetState(pSpec, 1, BYTE_SEARCH, MATCHENG_TABLE_TYPE_START);
        pStart = &pSpec->pTables[MATCHENG_TABLE_SIZE];
        pSpec->TrailingLiteral = FirstLiteral;
        pSpec->EndCondition = MATCHENG_CONDITION_TRAILING_LITERAL;
        pSpec->PatternLength = 1;

        /*Can we assume the start match fulfils the first state's match?*/
        if (OptimiseFirst) {
                /*Yes, get RE engine to advance to next state immediately*/
                pSpec->ByteSearchAdvance = 2;
                pStart[FirstLiteral] = START_MATCH_PUSH_ADVANCE;
        } else {
                /*No, ensure we evaluate the first state next*/
                pSpec->ByteSearchAdvance = 1;
                pStart[FirstLiteral] = START_MATCH_PUSH;
        }

        /*SIGH: Non-zero offset used by match to indicate skip search*/
        pSpec->SearchSkipOffset = 1;

        /*Are we scanning for LF (e.g. "ggrep -w $")?*/
        if (FirstLiteral == LF) {
                /*Yes, need more care at the end of the buffer*/
                pSpec->pEndmarkerActions[1] = ABANDON;
                pSpec->pNotEndmarkerActions[1] = pStart[LF];
                pStart[LF] = CHECK_BUFFER;
        }

} /*ScanByByteSearch*/


/************************************************************************/
/*                                                                      */
/*      ScanSequentially -- Set up start state to scan buffer           */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_ScanSequentially(MatchEng_Spec *pSpec)
{
        UINT i;
        MatchEng_Action *pStart;
        MatchEng_Action *pFirstRealRE;
        UINT Endmarker;

        /*Default to skipping past nonmatch bytes*/
        RETable_SetState(pSpec, 1, AGAIN, MATCHENG_TABLE_TYPE_START);

        /*Start match wherever first RE state matches*/
        pStart = &pSpec->pTables[MATCHENG_TABLE_SIZE];
        pFirstRealRE = &pSpec->pTables[pSpec->FirstRealStateNr * 
                                   MATCHENG_TABLE_SIZE];

        /*Write actions for RE start search*/
        for (i = 0; i < MATCHENG_TABLE_SIZE; i++, pStart++, pFirstRealRE++) {
                /*Does the following table entry simply advance?*/
                if (pStart[MATCHENG_TABLE_SIZE] == ADVANCE) {
                        /*Yes, optimise RE match start to immediately advance*/
                        *pStart = START_MATCH_PUSH_ADVANCE;
                        continue;
                }

                /*Does the first real state match this byte?*/
                if ((*pFirstRealRE != NO_MATCH) && 
                                (*pFirstRealRE != ABANDON)) {
                        /*Yes, this entry triggers a match attempt*/
                        *pStart = START_MATCH_PUSH;

                }

        }

        /*Have we been asked to count lines?*/
        pStart = &pSpec->pTables[MATCHENG_TABLE_SIZE];
        if (pSpec->SpecFlags & MATCHENG_SPEC_COUNT_LINES) {
                /*Yes, does LF start a match?*/
                if (( pStart[LF] == START_MATCH_PUSH) || 
                    (pStart[LF] == START_MATCH_PUSH_ADVANCE)) {
                        /*Yes, note LF and start match*/
                        pStart[LF] = NOTE_LINE_START_PUSH;

                        /*Have we initialised the deferred line inc table?*/
                        if (! gRETable.IncLineTableInitialised) {
                                /*No, initialise it now*/
                                for (i = 0; i < MATCHENG_TABLE_SIZE; i++) {
                                        gRETable.IncLineTable[i] = 
                                                INC_LINE_COUNT;
                                }
                                gRETable.IncLineTableInitialised = TRUE;
                        }

                        /*Provide table reference for search to use*/
                        pSpec->pIncLineTable = gRETable.IncLineTable;

                } else {
                        /*No, simply count the LF and keep scanning*/
                        pStart[LF] = NOTE_LINE;
                }
        }

        /*Use endmarker to delimit buffer*/
        Endmarker = MATCHENG_SPEC_ENDMARKER_R(pSpec->SpecFlags);

        /*Does LF match in the starting state?*/
        if (pStart[LF] != AGAIN) {
                /*Yes, we must be careful (but slower): look for appended LF*/
                Endmarker = LF;
        }

        /*Set up start state to terminate search when endmarker found*/
        pSpec->pEndmarkerActions[1] = ABANDON;
        pSpec->pNotEndmarkerActions[1] = pStart[Endmarker];
        pStart[Endmarker] = CHECK_BUFFER;
        pSpec->TrailingLiteral = Endmarker;
        pSpec->EndCondition = MATCHENG_CONDITION_TRAILING_LITERAL;
        pSpec->PatternLength = 1;

        /*Use opt flag (hack) to get match engine to find line start*/
        pSpec->SearchSkipOffset = 1;

} /*ScanSequentially*/


/************************************************************************/
/*                                                                      */
/*      PrepareStart -- Set up state to scan buffer for match           */
/*                                                                      */
/*      We always scan the file as a buffer -- assuming that lines      */
/*      are terminated by LF (and, optionally, by CR/LF).  The          */
/*      "start" state is the state that scans the buffer looking        */
/*      for worthwhile positions to commence a match attempt.           */
/*      Other functions (such as line counting) may be implemented      */
/*      in this state.                                                  */
/*                                                                      */
/*      The key to the speed of the search is the speed with            */
/*      which you can skip over lines that don't match.  In             */
/*      particular, this means trying to use clever algorithms          */
/*      such as the Boyer-Moore string search algorithm, or by          */
/*      applying brute force techniques such as the processor's         */
/*      string search instruction.  This function analyses the          */
/*      start of the regular expression and selects the buffer          */
/*      scan technique that seems the best bet (based on the            */
/*      nature of the RE and possibly some guesses as to the            */
/*      likely nature of the data being searched).                      */
/*                                                                      */
/*      Note that we assume that the RE's easiest part to search        */
/*      is at the start of the RE -- that the caller has                */
/*      rearranged the search to ensure the easiest bit to find         */
/*      is searched first.                                              */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_PrepareStart(MatchEng_Spec *pSpec)
{
        UINT NrSkipStates;
        BOOL FirstIsLiteral;
        BYTE FirstLiteral;

        /*Is the first state a dead-end state?*/
        if (MATCHENG_TABLE_TYPE_R(pSpec->pTableFlags[2]) ==
            MATCHENG_TABLE_TYPE_DEAD_END) {
                /*Yes, make starting state abandon instantly*/
                RETable_SetState(pSpec, 1, 
                                 ABANDON, 
                                 MATCHENG_TABLE_TYPE_START);
                return;
        }

        /*Is the search anchored to the start of the RE?*/
        if (pSpec->pTableFlags[1] == MATCHENG_TABLE_TYPE_START_LINE) {
                /*Yes, set up start table to search for LF as line start*/
                RETable_ScanForLineStart(pSpec);
                return;
        }

        /*Find out if starting states are amenable to skip-style searching*/
        NrSkipStates = 0;
        FirstIsLiteral = FALSE;
        if (pSpec->SpecFlags & MATCHENG_SPEC_SKIP_BYTES) {
                RETable_AnalyseStart(pSpec, &NrSkipStates, 
                                     &FirstIsLiteral, &FirstLiteral);
        }

        /*Set up start state to scan buffer based on analysis*/
        if (FirstIsLiteral && (NrSkipStates < 3)) {
                /*Literal: Does the nest state contain the literal match?*/
                if (pSpec->FirstRealStateNr == 2) {
                        /*Yes, scan buffer using CPU's byte search instr*/
                        RETable_ScanByByteSearch(pSpec, FirstLiteral, TRUE);
                } else {
                        /*No, scan with CPU instr but don't skip next state*/
                        RETable_ScanByByteSearch(pSpec, FirstLiteral, FALSE);
                }

        } else if (NrSkipStates >= 2) {
                /*Scan buffer using table-driven version of Boyer-Moore*/
                RETable_ScanByBoyerMoore(pSpec, NrSkipStates);
                pSpec->EndCondition = MATCHENG_CONDITION_TRAILING_LITERAL;

        } else {
                /*Slog through buffer as best we can*/
                RETable_ScanSequentially(pSpec);
        }

} /*PrepareStart*/


/************************************************************************/
/*                                                                      */
/*      IterationOpt -- Use following table to optimise backtracking    */
/*                                                                      */
/*      Each time an iterative part of the RE is reached, it is         */
/*      matched as far as possible, and then matching the rest of       */
/*      the expression is attempted.  If the next match fails, the      */
/*      last iteration is undone and matching the remainder is          */
/*      attempted again.  This is powerful but expensive.               */
/*                                                                      */
/*      IterationOpt finds significant savings by reducing the number   */
/*      of backtracking pathways that need to be considered.            */
/*      This is done by observing where the following table does        */
/*      not match, and editing the actions in the iteration table       */
/*      to not bother pushing a backtracking path for those bytes.      */
/*      For example, "fred.*bloggs" can be improved since we can        */
/*      see that during the ".*" iteration, it is only worth            */
/*      attempting to backtrack if the character was "b".               */
/*                                                                      */
/*      The optimisation is very easy to implement: for each byte       */
/*      of the input, if the iteration table matches with               */
/*      AGAIN_PUSH_ADVANCE but the following table has NO_MATCH,        */
/*      the backtracking push is worthless, so change the entry         */
/*      in the iteration table to AGAIN.                                */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_IterationOpt(MatchEng_Action *pTable)
{
        UINT i;
        MatchEng_Action *pNext;

        /*Check each iteration table entry against next table*/
        pNext = pTable + MATCHENG_TABLE_SIZE;
        for (i = 0; i < MATCHENG_TABLE_SIZE; i++, pTable++, pNext++) {
                /*Does the current table entry include backtracking?*/
                if (*pTable != AGAIN_PUSH_ADVANCE) {
                        /*No, ignore entry*/
                        continue;
                }

                /*Does this char in the next table represent a dead-end?*/
                if (*pNext != NO_MATCH) {
                        /*No, optimisation is not possible*/
                        continue;
                }

                /*Change iteration to not bother backtracking*/
                *pTable = AGAIN;

        }

} /*IterationOpt*/


/************************************************************************/
/*                                                                      */
/*      ZeroTripOpt -- Use following table to optimise backtracking     */
/*                                                                      */
/*      Similar to the iteration optimisation above, this               */
/*      function seeks to reduce the effort expended by tables          */
/*      containing zero trip cases where the next table doesn't         */
/*      match.  In these cases, BACK_AND_ADVANCE becomes                */
/*      NO_MATCH, and ADVANCE_PUSH_ZERO becomes ADVANCE.                */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_ZeroTripOpt(MatchEng_Action *pTable)
{
        UINT i;
        MatchEng_Action *pNext;

        /*Check each iteration table entry against next table*/
        pNext = pTable + MATCHENG_TABLE_SIZE;
        for (i = 0; i < MATCHENG_TABLE_SIZE; i++, pTable++, pNext++) {

                /*Does this char in the next table represent a dead-end?*/
                if (*pNext != NO_MATCH) {
                        /*No, optimisation is not possible*/
                        continue;
                }

                /*Optimise entry if it tries to advance*/
                if (*pTable == BACK_AND_ADVANCE) {
                        *pTable = NO_MATCH;

                } else if (*pTable == ADVANCE_PUSH_ZERO) {
                        *pTable = ADVANCE;
                }

        }

} /*ZeroTripOpt*/


/************************************************************************/
/*                                                                      */
/*      MetaMatchOpt -- Optimise zero-with (meta) tables                */
/*                                                                      */
/*      A meta-table implements a test without advancing the match      */
/*      position.  Therefore, any character that doesn't match in       */
/*      the next state also doesn't match in this state.                */
/*      This function applies this optimisation to the meta table.      */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_MetaMatchOpt(MatchEng_Action *pTable)
{
        UINT i;
        MatchEng_Action *pNext;

        /*Check each meta table entry against next table*/
        pNext = pTable + MATCHENG_TABLE_SIZE;
        for (i = 0; i < MATCHENG_TABLE_SIZE; i++, pTable++, pNext++) {
                /*Does this char in the next table represent a dead-end?*/
                if (*pNext != NO_MATCH) {
                        /*No, optimisation is not possible*/
                        continue;
                }

                /*Since next can't match, no point in our matching, either*/
                *pTable = NO_MATCH;

        }

} /*MetaMatchOpt*/


/************************************************************************/
/*                                                                      */
/*      TrimWordAdvance -- Trim table since next comes a word test      */
/*                                                                      */
/*      In the case of (say) "[0-z]\<", we can see that the word        */
/*      boundary test reduces the set of matching characters in the     */
/*      preceding class.  This function optimises this case by          */
/*      rewriting actions that advance but we know won't match to       */
/*      non-match actions.                                              */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_TrimWordAdvance(MatchEng_Action *pTable, BOOL NextMatchesWord)
{
        UINT i;
        BOOL IsWord;

        /*Check each meta table entry against next table*/
        for (i = 0; i < MATCHENG_TABLE_SIZE; i++, pTable++) {
                /*Decide whether this entry is a word char*/
                IsWord = isalnum(i) || (i == (UINT) '_');

                /*Would this char match in the next state?*/
                if (IsWord == NextMatchesWord) {
                        /*Yes, don't change its action*/
                        continue;
                }

                /*Change action if it tries to advance*/
                if (*pTable == ADVANCE) {
                        *pTable = NO_MATCH;
                }

        }

} /*TrimWordAdvance*/


/************************************************************************/
/*                                                                      */
/*      ZeroTripOptEnd -- Eliminate redundancy where z-trip at end      */
/*                                                                      */
/*      For each entry where the next table has COMPLETED, change       */
/*      ADVANCE_PUSH_ZERO to ADVANCE.  Not a really big win, and        */
/*      it's possible that this optimisation costs more to              */
/*      perform than it saves, but included anyway as it will           */
/*      certainly be worthwhile on files with many matches.             */
/*                                                                      */
/*      Only use this where the final state has COMPLETED for           */
/*      all actions -- otherwise the zero trip may be required.         */
/*      In particular, do not call this routine if the final state      */
/*      has been trimmed to match an end-of-word boundary.              */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_ZeroTripOptEnd(MatchEng_Action *pTable)
{
        UINT i;

        /*Change all ADVANCE_PUSH_ZERO actions to ADVANCE*/
        for (i = 0; i < MATCHENG_TABLE_SIZE; i++, pTable++) {
                /*Is this item an optional element at the end of the RE?*/
                if (*pTable == ADVANCE_PUSH_ZERO) {
                        /*Yes, no point in pushing backtrack*/
                        *pTable = ADVANCE;
                        continue;
                }
        }

} /*ZeroTripOptEnd*/


/************************************************************************/
/*                                                                      */
/*      OptimiseTables -- Optimise by comparing table activities        */
/*                                                                      */
/*      This function works through the table-driven version of         */
/*      the RE from last state to first, looking for relationships      */
/*      between tables that allow work to be eliminated.  Most          */
/*      importantly, in expressions like x.*y, the .* state             */
/*      doesn't need to store any backtracking context unless the       */
/*      character matched is a y.  The last-to-first order may          */
/*      let some optimisations ripple up through multiple tables.       */
/*                                                                      */
/*      This function also helps implement the word search              */
/*      function, if specified, by trimming each table so that          */
/*      only word characters match.                                     */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_OptimiseTables(MatchEng_Spec *pSpec)
{
        MatchEng_TableFlags *pFlags;
        MatchEng_Action *pTable;

        pFlags = &pSpec->pTableFlags[pSpec->NrTables];
        pTable = &pSpec->pTables[pSpec->NrTables * MATCHENG_TABLE_SIZE];

        /*Does final table before success table contain zero trip?*/
        pFlags--;
        pTable -= MATCHENG_TABLE_SIZE;
        if ((*pFlags & MATCHENG_TABLE_ZERO_TRIP) && 
                         (MATCHENG_TABLE_TYPE_R(pFlags[1]) == 
                                    MATCHENG_TABLE_TYPE_SUCCESS)) {
                /*Yes, optimise redundant final push where possible*/
                RETable_ZeroTripOptEnd(pTable);
        }

        /*Post-process tables in last-to-first order*/
        for (; pFlags > &pSpec->pTableFlags[1];
                        pFlags--, pTable -= MATCHENG_TABLE_SIZE) {
                /*Did this table contain iteration?*/
                if (*pFlags & MATCHENG_TABLE_ITERATIVE) {
                        /*Yes, optimise iteration backtracking*/
                        RETable_IterationOpt(pTable);
                }

                /*Did this table contain an optional (zero-trip) match?*/
                if (*pFlags & MATCHENG_TABLE_ZERO_TRIP) {
                        /*Yes, optimise backtracking as per iteration*/
                        RETable_ZeroTripOpt(pTable);
                }

                /*Does this table implement a meta-match (doesn't advance)?*/
                if (*pFlags & MATCHENG_TABLE_WIDTH_META) {
                        /*Yes, optimise nonmatches from next state*/
                        RETable_MetaMatchOpt(pTable);

                        /*Look for additional optimisations if word begin/end*/
                        switch (MATCHENG_TABLE_TYPE_R(*pFlags)) {
                        case MATCHENG_TABLE_TYPE_WORD_BEGINNING:
                                RETable_TrimWordAdvance(pTable - 
                                                        MATCHENG_TABLE_SIZE, 
                                                        FALSE);
                                break;

                        case MATCHENG_TABLE_TYPE_WORD_END:
                                RETable_TrimWordAdvance(pTable - 
                                                        MATCHENG_TABLE_SIZE, 
                                                        TRUE);
                                break;

                        default:
                                break;

                        }

                }

        }

} /*OptimiseTables*/


/************************************************************************/
/*                                                                      */
/*      WordFinalState -- Prepare success state for word match          */
/*                                                                      */
/*      Sets up the "success" state so that we only match if            */
/*      character immediately after matched text is a nonword           */
/*      character.                                                      */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_WordFinalState(MatchEng_Spec *pSpec, UINT StateNr)
{
        UINT Ch;
        MatchEng_Action *pAction;

        /*Work through each character of table*/
        pAction = &pSpec->pTables[StateNr * MATCHENG_TABLE_SIZE];
        pSpec->pTableFlags[StateNr] = MATCHENG_TABLE_TYPE_SUCCESS_WORD_END;
        for (Ch = 0; Ch < MATCHENG_TABLE_SIZE; Ch++, pAction++) {
                /*Is this character a nonword?*/
                if (! IS_WORD(Ch)) {
                        /*Yes, it completes match satisfactorily*/
                        *pAction = COMPLETED;
                } else {
                        /*Sorry, match failed*/
                        *pAction = NO_MATCH;
                }
        }

} /*WordFinalState*/


/************************************************************************/
/*                                                                      */
/*      CopyLFtoCR -- Make CR serve as a line terminator                */
/*                                                                      */
/*      Copies the actions for LF in each state to CR.  This is         */
/*      so that we can treat lines that end with CR/LF equally to       */
/*      lines that end with LF.  However, note that our treatment       */
/*      isn't perfect: we don't check that an LF follows the CR.        */
/*      This can be fixed by changing the CR action into a              */
/*      CHECK_FOR_LF action, but behoffski's very weary at the          */
/*      moment and is too tired to go off and do this.  (You need       */
/*      to save the action replaced by CR for execution in case         */
/*      the next character isn't a LF.)                                 */
/*                                                                      */
/************************************************************************/
module_scope void
RETable_CopyLFtoCR(MatchEng_Spec *pSpec)
{
        MatchEng_Action *pTable;
        MatchEng_Action *pTableEnd;

        pTable = &pSpec->pTables[2 * MATCHENG_TABLE_SIZE];
        pTableEnd = &pSpec->pTables[(pSpec->NrTables + 1) * 
                                   MATCHENG_TABLE_SIZE];

        /*Post-process tables in last-to-first order*/
        for (; pTable != pTableEnd; pTable += MATCHENG_TABLE_SIZE) {
                /*Copy LF action to CR in this table*/
                pTable[CR] = pTable[LF];
        }

} /*CopyLFtoCR*/


/************************************************************************/
/*                                                                      */
/*      Expand -- Convert "compact" RE codes to optimised table form    */
/*                                                                      */
/*      This function is used to prepare for table-driven pattern       */
/*      match searching.  It creates a fairly verbose form of the       */
/*      regular expression which is highly optimised for match          */
/*      searches.  A typical pattern will occupy 8 to 20 kbytes         */
/*      of RAM in its expanded form.                                    */
/*                                                                      */
/*      Flags allows configuration of RE interpretation,                */
/*      including selecting whether CR/LF is a line terminator.         */
/*      The flags also provide control over other match engine          */
/*      behaviours, such as line counting.  See definition of           */
/*      flag bits in matcheng.h.                                        */
/*                                                                      */
/*      The function returns FALSE if unable to prepare an              */
/*      expanded version of the RE.                                     */
/*                                                                      */
/************************************************************************/
public_scope BOOL
RETable_Expand(RegExp_Specification *pRESpec, 
               LWORD Flags, 
               MatchEng_Spec **ppSpec)
{
        MatchEng_Spec *pSpec;
        RE_ELEMENT *pRE;
        RE_ELEMENT *pRESave;
        UINT StateNr;
        BOOL AnchoredToEnd = FALSE;
        BOOL AnchoredToStart = FALSE;

        /*Check arguments*/
        if (pRESpec == NULL) {
                return FALSE;
        }

        /*Destroy return value to reduce chances of being misinterpreted*/
        *ppSpec = (MatchEng_Spec *) NIL;

        /*Acquire resources (e.g. memory) for optimised search*/
        if (! RETable_Create(pRESpec, &pSpec)) {
                /*Sorry, unable to get required resources*/
                return FALSE;
        }

        /*Record specification flags supplied by caller*/
        pSpec->SpecFlags = Flags;

        /*Work through RE specification, creating tables as we go*/
        pRE = pRESpec->CodeList;
        StateNr = 2;

        /*?? HACK? ALERT: We test FirstRealStateNr == 2 if byte search*/

        /*Do we match a word beginning at the start of the RE?*/
        if (REGEXP_TYPE_R(*pRE) == REGEXP_TYPE_WORD_PREV_NONWORD) {
                /*Yes, add state to test for this case*/
                RETable_WordMetaState(pSpec, 
                                 StateNr, 
                                 MATCHENG_TABLE_TYPE_PREV_NONWORD | 
                                      MATCHENG_TABLE_WIDTH_META, 
                                 PREV_NONWORD, 
                                 PREV_NONWORD);

                StateNr++;
                pRE++;
        }

        /*Remember first "real" state after meta-states*/
        pSpec->FirstRealStateNr = StateNr;

        for (;;) {
                switch (REGEXP_TYPE_R(*pRE)) {
                case REGEXP_TYPE_END_OF_EXPR:
                        /*Finished expanding*/
                        break;

                case REGEXP_TYPE_ANCHOR_START:
                        /*Set start table flag now as a signal for later*/
                        pSpec->pTableFlags[1] = 
                                MATCHENG_TABLE_TYPE_START_LINE;
                        AnchoredToStart = TRUE;
                        pRE++;
                        continue;

                case REGEXP_TYPE_ANCHOR_END:
                        /*Are all remaining RE elements optional?*/
                        if (! RegExp_AllOptional(pRE + 1)) {
                                /*No, can't match: destroy start and finish*/
                                RETable_SetState(pSpec, 
                                                 1, 
                                                 ABANDON, 
                                                 MATCHENG_TABLE_TYPE_START);
                                *ppSpec = pSpec;
                                return TRUE;
                        }

                        /*Search anchored to end: only EOL succeeds*/
                        AnchoredToEnd = TRUE;
                        RETable_SetState(pSpec, 
                                         StateNr, 
                                         NO_MATCH, 
                                         MATCHENG_TABLE_TYPE_SUCCESS);
                        pSpec->pTables[StateNr * 
                                      MATCHENG_TABLE_SIZE + LF] = 
                                COMPLETED;

                        goto FinishedExpanding;

                        break;

                case REGEXP_TYPE_WORD_BEGINNING:
                case REGEXP_TYPE_WORD_END:
                case REGEXP_TYPE_WORD_BOUNDARY:
                case REGEXP_TYPE_WORD_NONBOUNDARY:
                        /*Meta-match: don't expand if it is optional*/
                        switch (REGEXP_REPEAT_R(*pRE)) {
                        case REGEXP_REPEAT_0_OR_1:
                        case REGEXP_REPEAT_0_OR_MORE:
                                pRE++;
                                continue;
                        }

                        /*Does this meta-match occur at the start of the RE?*/
                        if (StateNr == pSpec->FirstRealStateNr) {
                                /*Yes, note leading meta-states*/
                                pSpec->FirstRealStateNr++;
                        }

                        /*FALLTHROUGH*/
                        
                default:
                        /*Handle specifier and any iterative modifiers*/
                        pRESave = pRE;
                        RETable_AddMatchChars(pSpec, StateNr, &pRE);
                        StateNr += RETable_ApplyIteration(pSpec, 
                                                          StateNr, 
                                                          pRESave);

                        /*Note: ApplyIteration might need to advance pRE*/
                        /*      when {n, m} iteration supported*/

                        StateNr++;
                        continue;

                }

                /*If we reach here, we've finished expanding RE*/
                break;

        }

        /*Note: end-anchored searches don't execute the following paragraph:*/

        /*Did the end-of-RE marker indicate nonword match?*/
        if (REGEXP_WORD_R(*pRE) == REGEXP_WORD_NONWORD_CHARS) {
                /*Yes, set last state so only nonword chars match*/
                RETable_WordFinalState(pSpec, StateNr);

        } else {
                /*No, any final character matches*/
                RETable_SetState(pSpec, 
                                 StateNr, 
                                 COMPLETED, 
                                 MATCHENG_TABLE_TYPE_SUCCESS);
        }


FinishedExpanding:

        /*Remember how many tables are in specification*/
        pSpec->NrTables = StateNr;

        /*Are we to treat CR/LF as line terminator as well as CR?*/
        if (Flags & MATCHENG_SPEC_CR_IS_TERMINATOR) {
                /*Yes, copy LF actions to CR in all tables*/
                RETable_CopyLFtoCR(pSpec);
        }

        /*Look for relationships between tables that we may optimise*/
        RETable_OptimiseTables(pSpec);

        /*Prepare starting state to scan buffer for RE start*/
        RETable_PrepareStart(pSpec);

        /*Was the RE merely an end-of-line anchor?*/
        if (AnchoredToEnd && 
            (StateNr == pSpec->FirstRealStateNr) && 
            (! AnchoredToStart)) {
                /*Yes, modify start+success states to match correctly*/
                RETable_AnchoredToEnd(pSpec);
        }

        /*Write optimised match specification to caller and report success*/
        *ppSpec = pSpec;
        return TRUE;

} /*Expand*/