(root)/
gcc-13.2.0/
libphobos/
src/
std/
regex/
internal/
thompson.d
//Written in the D programming language
/*
    Implementation of Thompson NFA std.regex engine.
    Key point is evaluation of all possible threads (state) at each step
    in a breadth-first manner, thereby geting some nice properties:
        - looking at each character only once
        - merging of equivalent threads, that gives matching process linear time complexity
*/
module std.regex.internal.thompson;

package(std.regex):

import std.range.primitives;
import std.regex.internal.ir;

//State of VM thread
struct Thread(DataIndex)
{
    Thread* next;    //intrusive linked list
    uint pc;
    uint counter;    //loop counter
    uint uopCounter; //counts micro operations inside one macro instruction (e.g. BackRef)
    Group!DataIndex[1] matches;
}

//head-tail singly-linked list
struct ThreadList(DataIndex)
{
    Thread!DataIndex* tip = null, toe = null;
    //add new thread to the start of list
    void insertFront(Thread!DataIndex* t)
    {
        if (tip)
        {
            t.next = tip;
            tip = t;
        }
        else
        {
            t.next = null;
            tip = toe = t;
        }
    }
    //add new thread to the end of list
    void insertBack(Thread!DataIndex* t)
    {
        if (toe)
        {
            toe.next = t;
            toe = t;
        }
        else
            tip = toe = t;
        toe.next = null;
    }
    //move head element out of list
    Thread!DataIndex* fetch()
    {
        auto t = tip;
        if (tip == toe)
            tip = toe = null;
        else
            tip = tip.next;
        return t;
    }
    //non-destructive iteration of ThreadList
    struct ThreadRange
    {
        const(Thread!DataIndex)* ct;
        this(ThreadList tlist){ ct = tlist.tip; }
        @property bool empty(){ return ct is null; }
        @property const(Thread!DataIndex)* front(){ return ct; }
        void popFront()
        {
            assert(ct);
            ct = ct.next;
        }
    }
    @property bool empty()
    {
        return tip == null;
    }
    ThreadRange opSlice()
    {
        return ThreadRange(this);
    }
}

template ThompsonOps(E, S, bool withInput:true)
{
@trusted:
    static bool op(IR code:IR.End)(E e, S* state)
    {
        with(e) with(state)
        {
            finish(t, matches, re.ir[t.pc].data);
            //fix endpoint of the whole match
            matches[0].end = index;
            recycle(t);
            //cut off low priority threads
            recycle(clist);
            recycle(worklist);
            debug(std_regex_matcher) writeln("Finished thread ", matches);
            return false; // no more state to eval
        }
    }

    static bool op(IR code:IR.Wordboundary)(E e, S* state)
    {
        with(e) with(state)
        {
            dchar back;
            DataIndex bi;
            //at start & end of input
            if (atStart && wordMatcher[front])
            {
                t.pc += IRL!(IR.Wordboundary);
                return true;
            }
            else if (atEnd && s.loopBack(index).nextChar(back, bi)
                    && wordMatcher[back])
            {
                t.pc += IRL!(IR.Wordboundary);
                return true;
            }
            else if (s.loopBack(index).nextChar(back, bi))
            {
                bool af = wordMatcher[front];
                bool ab = wordMatcher[back];
                if (af ^ ab)
                {
                    t.pc += IRL!(IR.Wordboundary);
                    return true;
                }
            }
            return popState(e);
        }
    }

    static bool op(IR code:IR.Notwordboundary)(E e, S* state)
    {
        with(e) with(state)
        {
            dchar back;
            DataIndex bi;
            //at start & end of input
            if (atStart && wordMatcher[front])
            {
                return popState(e);
            }
            else if (atEnd && s.loopBack(index).nextChar(back, bi)
                    && wordMatcher[back])
            {
                return popState(e);
            }
            else if (s.loopBack(index).nextChar(back, bi))
            {
                bool af = wordMatcher[front];
                bool ab = wordMatcher[back]  != 0;
                if (af ^ ab)
                {
                    return popState(e);
                }
            }
            t.pc += IRL!(IR.Notwordboundary);
        }
        return true;
    }

    static bool op(IR code:IR.Bof)(E e, S* state)
    {
        with(e) with(state)
        {
            if (atStart)
            {
                t.pc += IRL!(IR.Bof);
                return true;
            }
            else
            {
                return popState(e);
            }
        }
    }

    static bool op(IR code:IR.Bol)(E e, S* state)
    {
        with(e) with(state)
        {
            dchar back;
            DataIndex bi;
            if (atStart
                ||(s.loopBack(index).nextChar(back,bi)
                && startOfLine(back, front == '\n')))
            {
                t.pc += IRL!(IR.Bol);
                return true;
            }
            else
            {
                return popState(e);
            }
        }
    }

    static bool op(IR code:IR.Eof)(E e, S* state)
    {
        with(e) with(state)
        {
            if (atEnd)
            {
                t.pc += IRL!(IR.Eol);
                return true;
            }
            else
            {
                return popState(e);
            }
        }
    }

    static bool op(IR code:IR.Eol)(E e, S* state)
    {
        with(e) with(state)
        {
            dchar back;
            DataIndex bi;
            //no matching inside \r\n
            if (atEnd || (endOfLine(front, s.loopBack(index).nextChar(back, bi)
                    && back == '\r')))
            {
                t.pc += IRL!(IR.Eol);
                return true;
            }
            else
            {
                return popState(e);
            }

        }
    }

    static bool op(IR code:IR.InfiniteStart)(E e, S* state)
    {
        with(e) with(state)
            t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteStart);
        return op!(IR.InfiniteEnd)(e,state);
    }

    static bool op(IR code:IR.InfiniteBloomStart)(E e, S* state)
    {
        with(e) with(state)
            t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteBloomStart);
        return op!(IR.InfiniteBloomEnd)(e,state);
    }

    static bool op(IR code:IR.InfiniteQStart)(E e, S* state)
    {
        with(e) with(state)
            t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteQStart);
        return op!(IR.InfiniteQEnd)(e,state);
    }

    static bool op(IR code:IR.RepeatStart)(E e, S* state)
    {
        with(e) with(state)
            t.pc += re.ir[t.pc].data + IRL!(IR.RepeatStart);
        return op!(IR.RepeatEnd)(e,state);
    }

    static bool op(IR code:IR.RepeatQStart)(E e, S* state)
    {
        with(e) with(state)
            t.pc += re.ir[t.pc].data + IRL!(IR.RepeatQStart);
        return op!(IR.RepeatQEnd)(e,state);
    }

    static bool op(IR code)(E e, S* state)
        if (code == IR.RepeatEnd || code == IR.RepeatQEnd)
    {
        with(e) with(state)
        {
            //len, step, min, max
                uint len = re.ir[t.pc].data;
                uint step =  re.ir[t.pc+2].raw;
                uint min = re.ir[t.pc+3].raw;
                if (t.counter < min)
                {
                    t.counter += step;
                    t.pc -= len;
                    return true;
                }
                if (merge[re.ir[t.pc + 1].raw+t.counter] < genCounter)
                {
                    debug(std_regex_matcher) writefln("A thread(pc=%s) passed there : %s ; GenCounter=%s mergetab=%s",
                                    t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
                    merge[re.ir[t.pc + 1].raw+t.counter] = genCounter;
                }
                else
                {
                    debug(std_regex_matcher)
                        writefln("A thread(pc=%s) got merged there : %s ; GenCounter=%s mergetab=%s",
                            t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
                    return popState(e);
                }
                uint max = re.ir[t.pc+4].raw;
                if (t.counter < max)
                {
                    if (re.ir[t.pc].code == IR.RepeatEnd)
                    {
                        //queue out-of-loop thread
                        worklist.insertFront(fork(t, t.pc + IRL!(IR.RepeatEnd),  t.counter % step));
                        t.counter += step;
                        t.pc -= len;
                    }
                    else
                    {
                        //queue into-loop thread
                        worklist.insertFront(fork(t, t.pc - len,  t.counter + step));
                        t.counter %= step;
                        t.pc += IRL!(IR.RepeatEnd);
                    }
                }
                else
                {
                    t.counter %= step;
                    t.pc += IRL!(IR.RepeatEnd);
                }
                return true;
        }
    }

    static bool op(IR code)(E e, S* state)
        if (code == IR.InfiniteEnd || code == IR.InfiniteQEnd)
    {
        with(e) with(state)
        {
            if (merge[re.ir[t.pc + 1].raw+t.counter] < genCounter)
            {
                debug(std_regex_matcher) writefln("A thread(pc=%s) passed there : %s ; GenCounter=%s mergetab=%s",
                                t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
                merge[re.ir[t.pc + 1].raw+t.counter] = genCounter;
            }
            else
            {
                debug(std_regex_matcher) writefln("A thread(pc=%s) got merged there : %s ; GenCounter=%s mergetab=%s",
                                t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
                return popState(e);
            }
            uint len = re.ir[t.pc].data;
            uint pc1, pc2; //branches to take in priority order
            if (re.ir[t.pc].code == IR.InfiniteEnd)
            {
                pc1 = t.pc - len;
                pc2 = t.pc + IRL!(IR.InfiniteEnd);
            }
            else
            {
                pc1 = t.pc + IRL!(IR.InfiniteEnd);
                pc2 = t.pc - len;
            }
            worklist.insertFront(fork(t, pc2, t.counter));
            t.pc = pc1;
            return true;
        }
    }

    static bool op(IR code)(E e, S* state)
        if (code == IR.InfiniteBloomEnd)
    {
        with(e) with(state)
        {
            if (merge[re.ir[t.pc + 1].raw+t.counter] < genCounter)
            {
                debug(std_regex_matcher) writefln("A thread(pc=%s) passed there : %s ; GenCounter=%s mergetab=%s",
                                t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
                merge[re.ir[t.pc + 1].raw+t.counter] = genCounter;
            }
            else
            {
                debug(std_regex_matcher) writefln("A thread(pc=%s) got merged there : %s ; GenCounter=%s mergetab=%s",
                                t.pc, index, genCounter, merge[re.ir[t.pc + 1].raw+t.counter] );
                return popState(e);
            }
            uint len = re.ir[t.pc].data;
            uint pc1, pc2; //branches to take in priority order
            pc1 = t.pc - len;
            pc2 = t.pc + IRL!(IR.InfiniteBloomEnd);
            uint filterIndex = re.ir[t.pc + 2].raw;
            if (re.filters[filterIndex][front])
                worklist.insertFront(fork(t, pc2, t.counter));
            t.pc = pc1;
            return true;
        }
    }

    static bool op(IR code:IR.OrEnd)(E e, S* state)
    {
        with(e) with(state)
        {
            if (merge[re.ir[t.pc + 1].raw+t.counter] < genCounter)
            {
                debug(std_regex_matcher) writefln("A thread(pc=%s) passed there : %s ; GenCounter=%s mergetab=%s",
                                t.pc, s[index .. s.lastIndex], genCounter, merge[re.ir[t.pc + 1].raw + t.counter] );
                merge[re.ir[t.pc + 1].raw+t.counter] = genCounter;
                t.pc += IRL!(IR.OrEnd);
            }
            else
            {
                debug(std_regex_matcher) writefln("A thread(pc=%s) got merged there : %s ; GenCounter=%s mergetab=%s",
                                t.pc, s[index .. s.lastIndex], genCounter, merge[re.ir[t.pc + 1].raw + t.counter] );
                return popState(e);
            }
            return true;
        }
    }

    static bool op(IR code:IR.OrStart)(E e, S* state)
    {
        with(e) with(state)
        {
            t.pc += IRL!(IR.OrStart);
            return op!(IR.Option)(e,state);
        }
    }

    static bool op(IR code:IR.Option)(E e, S* state)
    {
        with(e) with(state)
        {
            uint next = t.pc + re.ir[t.pc].data + IRL!(IR.Option);
            //queue next Option
            if (re.ir[next].code == IR.Option)
            {
                worklist.insertFront(fork(t, next, t.counter));
            }
            t.pc += IRL!(IR.Option);
            return true;
        }
    }

    static bool op(IR code:IR.GotoEndOr)(E e, S* state)
    {
        with(e) with(state)
        {
            t.pc = t.pc + re.ir[t.pc].data + IRL!(IR.GotoEndOr);
            return op!(IR.OrEnd)(e, state);
        }
    }

    static bool op(IR code:IR.GroupStart)(E e, S* state)
    {
        with(e) with(state)
        {
            uint n = re.ir[t.pc].data;
            t.matches.ptr[n].begin = index;
            t.pc += IRL!(IR.GroupStart);
            return true;
        }
    }
    static bool op(IR code:IR.GroupEnd)(E e, S* state)
    {
        with(e) with(state)
        {
            uint n = re.ir[t.pc].data;
            t.matches.ptr[n].end = index;
            t.pc += IRL!(IR.GroupEnd);
            return true;
        }
    }

    static bool op(IR code:IR.Backref)(E e, S* state)
    {
        with(e) with(state)
        {
            uint n = re.ir[t.pc].data;
            Group!DataIndex* source = re.ir[t.pc].localRef ? t.matches.ptr : backrefed.ptr;
            assert(source);
            if (source[n].begin == source[n].end)//zero-width Backref!
            {
                t.pc += IRL!(IR.Backref);
                return true;
            }
            else
            {
                size_t idx = source[n].begin + t.uopCounter;
                size_t end = source[n].end;
                if (s[idx .. end].front == front)
                {
                    import std.utf : stride;

                    t.uopCounter += stride(s[idx .. end], 0);
                    if (t.uopCounter + source[n].begin == source[n].end)
                    {//last codepoint
                        t.pc += IRL!(IR.Backref);
                        t.uopCounter = 0;
                    }
                    nlist.insertBack(t);
                }
                else
                    recycle(t);
                t = worklist.fetch();
                return t != null;
            }
        }
    }


    static bool op(IR code)(E e, S* state)
        if (code == IR.LookbehindStart || code == IR.NeglookbehindStart)
    {
        with(e) with(state)
        {
            uint len = re.ir[t.pc].data;
            uint ms = re.ir[t.pc + 1].raw, me = re.ir[t.pc + 2].raw;
            uint end = t.pc + len + IRL!(IR.LookbehindEnd) + IRL!(IR.LookbehindStart);
            bool positive = re.ir[t.pc].code == IR.LookbehindStart;
            static if (Stream.isLoopback)
                auto matcher = fwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
            else
                auto matcher = bwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
            matcher.backrefed = backrefed.empty ? t.matches : backrefed;
            //backMatch
            auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookbehindStart));
            freelist = matcher.freelist;
            subCounters[t.pc] = matcher.genCounter;
            if ((mRes != 0 ) ^ positive)
            {
                return popState(e);
            }
            t.pc = end;
            return true;
        }
    }

    static bool op(IR code)(E e, S* state)
        if (code == IR.LookaheadStart || code == IR.NeglookaheadStart)
    {
        with(e) with(state)
        {
            auto save = index;
            uint len = re.ir[t.pc].data;
            uint ms = re.ir[t.pc+1].raw, me = re.ir[t.pc+2].raw;
            uint end = t.pc+len+IRL!(IR.LookaheadEnd)+IRL!(IR.LookaheadStart);
            bool positive = re.ir[t.pc].code == IR.LookaheadStart;
            static if (Stream.isLoopback)
                auto matcher = bwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
            else
                auto matcher = fwdMatcher(t.pc, end, me - ms, subCounters.get(t.pc, 0));
            matcher.backrefed = backrefed.empty ? t.matches : backrefed;
            auto mRes = matcher.matchOneShot(t.matches.ptr[ms .. me], IRL!(IR.LookaheadStart));
            freelist = matcher.freelist;
            subCounters[t.pc] = matcher.genCounter;
            s.reset(index);
            next();
            if ((mRes != 0) ^ positive)
            {
                return popState(e);
            }
            t.pc = end;
            return true;
        }
    }

    static bool op(IR code)(E e, S* state)
        if (code == IR.LookaheadEnd || code == IR.NeglookaheadEnd ||
            code == IR.LookbehindEnd || code == IR.NeglookbehindEnd)
    {
        with(e) with(state)
        {
                finish(t, matches.ptr[0 .. re.ngroup], re.ir[t.pc].data);
                recycle(t);
                //cut off low priority threads
                recycle(clist);
                recycle(worklist);
                return false; // no more state
        }
    }

    static bool op(IR code:IR.Nop)(E e, S* state)
    {
        with(state) t.pc += IRL!(IR.Nop);
        return true;
    }

    static bool op(IR code:IR.OrChar)(E e, S* state)
    {
        with(e) with(state)
        {
            uint len = re.ir[t.pc].sequence;
            uint end = t.pc + len;
            static assert(IRL!(IR.OrChar) == 1);
            for (; t.pc < end; t.pc++)
                if (re.ir[t.pc].data == front)
                    break;
            if (t.pc != end)
            {
                t.pc = end;
                nlist.insertBack(t);
            }
            else
                recycle(t);
            t = worklist.fetch();
            return t != null;
        }
    }

    static bool op(IR code:IR.Char)(E e, S* state)
    {
        with(e) with(state)
        {
            if (front == re.ir[t.pc].data)
            {
                t.pc += IRL!(IR.Char);
                nlist.insertBack(t);
            }
            else
                recycle(t);
            t = worklist.fetch();
            return t != null;
        }
    }

    static bool op(IR code:IR.Any)(E e, S* state)
    {
        with(e) with(state)
        {
            t.pc += IRL!(IR.Any);
            nlist.insertBack(t);
            t = worklist.fetch();
            return t != null;
        }
    }

    static bool op(IR code:IR.CodepointSet)(E e, S* state)
    {
        with(e) with(state)
        {
            if (re.charsets[re.ir[t.pc].data].scanFor(front))
            {
                t.pc += IRL!(IR.CodepointSet);
                nlist.insertBack(t);
            }
            else
            {
                recycle(t);
            }
            t = worklist.fetch();
            return t != null;
        }
    }

    static bool op(IR code:IR.Trie)(E e, S* state)
    {
        with(e) with(state)
        {
            if (re.matchers[re.ir[t.pc].data][front])
            {
                t.pc += IRL!(IR.Trie);
                nlist.insertBack(t);
            }
            else
            {
                recycle(t);
            }
            t = worklist.fetch();
            return t != null;
        }
    }

}

template ThompsonOps(E,S, bool withInput:false)
{
@trusted:
    // can't match these without input
    static bool op(IR code)(E e, S* state)
        if (code == IR.Char || code == IR.OrChar || code == IR.CodepointSet
        || code == IR.Trie || code == IR.Char || code == IR.Any)
    {
        return state.popState(e);
    }

    // special case of zero-width backref
    static bool op(IR code:IR.Backref)(E e, S* state)
    {
        with(e) with(state)
        {
            uint n = re.ir[t.pc].data;
            Group!DataIndex* source = re.ir[t.pc].localRef ? t.matches.ptr : backrefed.ptr;
            assert(source);
            if (source[n].begin == source[n].end)//zero-width Backref!
            {
                t.pc += IRL!(IR.Backref);
                return true;
            }
            else
                return popState(e);
        }
    }

    // forward all control flow to normal versions
    static bool op(IR code)(E e, S* state)
        if (code != IR.Char && code != IR.OrChar && code != IR.CodepointSet
        && code != IR.Trie && code != IR.Char && code != IR.Any && code != IR.Backref)
    {
        return ThompsonOps!(E,S,true).op!code(e,state);
    }
}

/+
   Thomspon matcher does all matching in lockstep,
   never looking at the same char twice
+/
@trusted class ThompsonMatcher(Char, StreamType = Input!Char): Matcher!Char
if (is(Char : dchar))
{
    alias DataIndex = Stream.DataIndex;
    alias Stream = StreamType;
    alias OpFunc = bool function(ThompsonMatcher, State*) pure;
    alias BackMatcher = ThompsonMatcher!(Char, BackLooper!(Stream));
    alias OpBackFunc = bool function(BackMatcher, BackMatcher.State*) pure;
    Thread!DataIndex* freelist;
    ThreadList!DataIndex clist, nlist;
    DataIndex[] merge;
    Group!DataIndex[] backrefed;
    const Regex!Char re;           //regex program
    Stream s;
    dchar front;
    DataIndex index;
    DataIndex genCounter;    //merge trace counter, goes up on every dchar
    size_t[size_t] subCounters; //a table of gen counter per sub-engine: PC -> counter
    OpFunc[] opCacheTrue;   // pointers to Op!(IR.xyz) for each bytecode
    OpFunc[] opCacheFalse;  // ditto
    OpBackFunc[] opCacheBackTrue;   // ditto
    OpBackFunc[] opCacheBackFalse;  // ditto
    size_t threadSize;
    size_t _refCount;
    int matched;
    bool exhausted;

final:
    pure
    static struct State
    {
        Thread!DataIndex* t;
        ThreadList!DataIndex worklist;
        Group!DataIndex[] matches;

        bool popState(E)(E e)
        {
            with(e)
            {
                recycle(t);
                t = worklist.fetch();
                return t != null;
            }
        }

    }

    static if (__traits(hasMember,Stream, "search"))
    {
        enum kicked = true;
    }
    else
        enum kicked = false;

    static size_t getThreadSize(const ref Regex!Char re)
    {
        return re.ngroup
            ? (Thread!DataIndex).sizeof + (re.ngroup-1)*(Group!DataIndex).sizeof
            : (Thread!DataIndex).sizeof - (Group!DataIndex).sizeof;
    }

    static size_t initialMemory(const ref Regex!Char re)
    {
        return getThreadSize(re)*re.threadCount + re.hotspotTableSize*size_t.sizeof
            +4*OpFunc.sizeof*re.ir.length;
    }

    //true if it's start of input
    @property bool atStart(){   return index == 0; }

    //true if it's end of input
    @property bool atEnd(){  return index == s.lastIndex && s.atEnd; }

    override @property ref size_t refCount() @safe { return _refCount; }

    override @property ref const(Regex!Char) pattern() @safe { return re; }

    bool next()
    {
        if (!s.nextChar(front, index))
        {
            index =  s.lastIndex;
            return false;
        }
        return true;
    }

    static if (kicked)
    {
        bool search()
        {

            if (!s.search(re.kickstart, front, index))
            {
                index = s.lastIndex;
                return false;
            }
            return true;
        }
    }

    void initExternalMemory(void[] memory)
    {
        threadSize = getThreadSize(re);
        prepareFreeList(re.threadCount, memory);
        if (re.hotspotTableSize)
        {
            merge = arrayInChunk!(DataIndex)(re.hotspotTableSize, memory);
            merge[] = 0;
        }
        opCacheTrue = arrayInChunk!(OpFunc)(re.ir.length, memory);
        opCacheFalse = arrayInChunk!(OpFunc)(re.ir.length, memory);
        opCacheBackTrue = arrayInChunk!(OpBackFunc)(re.ir.length, memory);
        opCacheBackFalse = arrayInChunk!(OpBackFunc)(re.ir.length, memory);

        for (uint pc = 0; pc<re.ir.length; pc += re.ir[pc].length)
        {
        L_dispatch:
            switch (re.ir[pc].code)
            {
                foreach (e; __traits(allMembers, IR))
                {
            mixin(`case IR.`~e~`:
                    opCacheTrue[pc] = &Ops!(true).op!(IR.`~e~`);
                    opCacheBackTrue[pc] = &BackOps!(true).op!(IR.`~e~`);
                    opCacheFalse[pc] = &Ops!(false).op!(IR.`~e~`);
                    opCacheBackFalse[pc] = &BackOps!(false).op!(IR.`~e~`);
                break L_dispatch;
                `);
                }
            default:
                assert(0, "Unrecognized instruction "~re.ir[pc].mnemonic);
            }
        }
    }

    override Matcher!Char rearm(in Char[] data)
    {
        exhausted = false;
        matched = 0;
        s = Stream(data);
        return this;
    }

    this()(ref const Regex!Char program, Stream stream, void[] memory)
    {
         // We are emplace'd to malloced memory w/o blitting T.init over it\
         // make sure we initialize all fields explicitly
        _refCount = 1;
        subCounters = null;
        backrefed = null;
        exhausted = false;
        matched = 0;
        re = program;
        s = stream;
        initExternalMemory(memory);
        genCounter = 0;
    }

    this(ThompsonMatcher matcher, size_t lo, size_t hi, uint nGroup, Stream stream)
    {
        _refCount = 1;
        subCounters = matcher.subCounters;
        s = stream;
        auto code = matcher.re.ir[lo .. hi];
        re = matcher.re.withCode(code).withNGroup(nGroup);
        threadSize = matcher.threadSize;
        merge = matcher.merge;
        freelist = matcher.freelist;
        opCacheTrue = matcher.opCacheTrue[lo .. hi];
        opCacheBackTrue = matcher.opCacheBackTrue[lo .. hi];
        opCacheFalse = matcher.opCacheFalse[lo .. hi];
        opCacheBackFalse = matcher.opCacheBackFalse[lo .. hi];
        front = matcher.front;
        index = matcher.index;
    }

    this(BackMatcher matcher, size_t lo, size_t hi, uint nGroup, Stream stream)
    {
        _refCount = 1;
        subCounters = matcher.subCounters;
        s = stream;
        auto code = matcher.re.ir[lo .. hi];
        re = matcher.re.withCode(code).withNGroup(nGroup);
        threadSize = matcher.threadSize;
        merge = matcher.merge;
        freelist = matcher.freelist;
        opCacheTrue = matcher.opCacheBackTrue[lo .. hi];
        opCacheBackTrue = matcher.opCacheTrue[lo .. hi];
        opCacheFalse = matcher.opCacheBackFalse[lo .. hi];
        opCacheBackFalse = matcher.opCacheFalse[lo .. hi];
        front = matcher.front;
        index = matcher.index;
    }

    auto fwdMatcher()(size_t lo, size_t hi, uint nGroup, size_t counter)
    {
        auto m = new ThompsonMatcher!(Char, Stream)(this, lo, hi, nGroup, s);
        m.genCounter = counter;
        return m;
    }

    auto bwdMatcher()(size_t lo, size_t hi, uint nGroup, size_t counter)
    {
        alias BackLooper = typeof(s.loopBack(index));
        auto m = new ThompsonMatcher!(Char, BackLooper)(this, lo, hi, nGroup, s.loopBack(index));
        m.genCounter = counter;
        m.next();
        return m;
    }

    override void dupTo(Matcher!Char engine, void[] memory)
    {
        auto thompson = cast(ThompsonMatcher) engine;
        thompson.s = s;
        thompson.subCounters = null;
        thompson.front = front;
        thompson.index = index;
        thompson.matched = matched;
        thompson.exhausted = exhausted;
        thompson.initExternalMemory(memory);
    }

    override int match(Group!DataIndex[] matches)
    {
        debug(std_regex_matcher)
            writeln("------------------------------------------");
        if (exhausted)
        {
            return false;
        }
        if (re.flags & RegexInfo.oneShot)
        {
            next();
            exhausted = true;
            return matchOneShot(matches);
        }
        static if (kicked)
            if (!re.kickstart.empty)
                return matchImpl!(true)(matches);
        return matchImpl!(false)(matches);
    }

    //match the input and fill matches
    int matchImpl(bool withSearch)(Group!DataIndex[] matches)
    {
        if (!matched && clist.empty)
        {
           static if (withSearch)
                search();
           else
                next();
        }
        else//char in question is  fetched in prev call to match
        {
            matched = 0;
        }
        State state;
        state.matches = matches;

        if (!atEnd)//if no char
            for (;;)
            {
                genCounter++;
                debug(std_regex_matcher)
                {
                    writefln("Threaded matching threads at  %s", s[index .. s.lastIndex]);
                    foreach (t; clist[])
                    {
                        assert(t);
                        writef("pc=%s ",t.pc);
                        write(t.matches);
                        writeln();
                    }
                }
                for (state.t = clist.fetch(); state.t; state.t = clist.fetch())
                {
                    eval!true(&state);
                }
                //if we already have match no need to push the engine
                if (!matched)
                {
                    state.t = createStart(index);
                    eval!true(&state);//new thread staring at this position
                }
                else if (nlist.empty)
                {
                    debug(std_regex_matcher) writeln("Stopped  matching before consuming full input");
                    break;//not a partial match for sure
                }
                clist = nlist;
                nlist = (ThreadList!DataIndex).init;
                if (clist.tip is null)
                {
                    static if (withSearch)
                    {
                        if (!search())
                            break;
                    }
                    else
                    {
                        if (!next())
                            break;
                    }
                }
                else if (!next())
                {
                    if (!atEnd) return false;
                    exhausted = true;
                    break;
                }
            }

        genCounter++; //increment also on each end
        debug(std_regex_matcher) writefln("Threaded matching threads at end");
        //try out all zero-width posibilities
        for (state.t = clist.fetch(); state.t; state.t = clist.fetch())
        {
            eval!false(&state);
        }
        if (!matched)
        {
            state.t = createStart(index);
            eval!false(&state);//new thread starting at end of input
        }
        if (matched)
        {//in case NFA found match along the way
         //and last possible longer alternative ultimately failed
            s.reset(matches[0].end);//reset to last successful match
            next();//and reload front character
            //--- here the exact state of stream was restored ---
            exhausted = atEnd || !(re.flags & RegexOption.global);
            //+ empty match advances the input
            if (!exhausted && matches[0].begin == matches[0].end)
                next();
        }
        return matched;
    }

    /+
        handle succesful threads
    +/
    void finish(const(Thread!DataIndex)* t, Group!DataIndex[] matches, int code)
    {
        matches.ptr[0 .. re.ngroup] = t.matches.ptr[0 .. re.ngroup];
        debug(std_regex_matcher)
        {
            writef("FOUND pc=%s prog_len=%s",
                    t.pc, re.ir.length);
            if (!matches.empty)
                writefln(": %s..%s", matches[0].begin, matches[0].end);
            foreach (v; matches)
                writefln("%d .. %d", v.begin, v.end);
        }
        matched = code;
    }

    alias Ops(bool withInput) =  ThompsonOps!(ThompsonMatcher, State, withInput);
    alias BackOps(bool withInput) =  ThompsonOps!(BackMatcher, BackMatcher.State, withInput);

    /+
        match thread against codepoint, cutting trough all 0-width instructions
        and taking care of control flow, then add it to nlist
    +/
    void eval(bool withInput)(State* state)
    {
        debug(std_regex_matcher) writeln("---- Evaluating thread");
        static if (withInput)
            while (opCacheTrue.ptr[state.t.pc](this, state)){}
        else
            while (opCacheFalse.ptr[state.t.pc](this, state)){}
    }
    enum uint RestartPc = uint.max;
    //match the input, evaluating IR without searching
    int matchOneShot(Group!DataIndex[] matches, uint startPc = 0)
    {
        debug(std_regex_matcher)
        {
            writefln("---------------single shot match ----------------- ");
        }
        alias evalFn = eval;
        assert(clist == (ThreadList!DataIndex).init || startPc == RestartPc); // incorrect after a partial match
        assert(nlist == (ThreadList!DataIndex).init || startPc == RestartPc);
        State state;
        state.matches = matches;
        if (!atEnd)//if no char
        {
            debug(std_regex_matcher)
            {
                writefln("-- Threaded matching threads at  %s",  s[index .. s.lastIndex]);
            }
            if (startPc != RestartPc)
            {
                state.t = createStart(index, startPc);
                genCounter++;
                evalFn!true(&state);
            }
            for (;;)
            {
                debug(std_regex_matcher) writeln("\n-- Started iteration of main cycle");
                genCounter++;
                debug(std_regex_matcher)
                {
                    foreach (t; clist[])
                    {
                        assert(t);
                    }
                }
                for (state.t = clist.fetch(); state.t; state.t = clist.fetch())
                {
                    evalFn!true(&state);
                }
                if (nlist.empty)
                {
                    debug(std_regex_matcher) writeln("Stopped  matching before consuming full input");
                    break;//not a partial match for sure
                }
                clist = nlist;
                nlist = (ThreadList!DataIndex).init;
                if (!next())
                    break;
                debug(std_regex_matcher) writeln("-- Ended iteration of main cycle\n");
            }
        }
        genCounter++; //increment also on each end
        debug(std_regex_matcher) writefln("-- Matching threads at end");
        //try out all zero-width posibilities
        for (state.t = clist.fetch(); state.t; state.t = clist.fetch())
        {
            evalFn!false(&state);
        }
        if (!matched)
        {
            state.t = createStart(index, startPc);
            evalFn!false(&state);
        }
        return matched;
    }

    //get a dirty recycled Thread
    Thread!DataIndex* allocate()
    {
        assert(freelist, "not enough preallocated memory");
        Thread!DataIndex* t = freelist;
        freelist = freelist.next;
        return t;
    }

    //link memory into a free list of Threads
    void prepareFreeList(size_t size, ref void[] memory)
    {
        void[] mem = memory[0 .. threadSize*size];
        memory = memory[threadSize * size .. $];
        freelist = cast(Thread!DataIndex*)&mem[0];
        size_t i;
        for (i = threadSize; i < threadSize*size; i += threadSize)
            (cast(Thread!DataIndex*)&mem[i-threadSize]).next = cast(Thread!DataIndex*)&mem[i];
        (cast(Thread!DataIndex*)&mem[i-threadSize]).next = null;
    }

    //dispose a thread
    void recycle(Thread!DataIndex* t)
    {
        t.next = freelist;
        freelist = t;
    }

    //dispose list of threads
    void recycle(ref ThreadList!DataIndex list)
    {
        if (list.tip)
        {
            // just put this head-tail list in front of freelist
            list.toe.next = freelist;
            freelist = list.tip;
            list = list.init;
        }
    }

    //creates a copy of master thread with given pc
    Thread!DataIndex* fork(Thread!DataIndex* master, uint pc, uint counter)
    {
        auto t = allocate();
        t.matches.ptr[0 .. re.ngroup] = master.matches.ptr[0 .. re.ngroup];
        t.pc = pc;
        t.counter = counter;
        t.uopCounter = 0;
        return t;
    }

    //creates a start thread
    Thread!DataIndex* createStart(DataIndex index, uint pc = 0)
    {
        auto t = allocate();
        t.matches.ptr[0 .. re.ngroup] = (Group!DataIndex).init;
        t.matches[0].begin = index;
        t.pc = pc;
        t.counter = 0;
        t.uopCounter = 0;
        return t;
    }
}