Кодирование первого и последующего наборов в CFG - PullRequest
0 голосов
/ 23 октября 2018
COMPUTING First[A]
------------------

// Procedure computeFirst
//
// Input: productions P_1, P_2, ..., P_m where P_i = A::=X_1 X_2 X_3 ...X_n
//    with no alternations allowed in the productions.
//
// Output: Computes first of a term or nonterm accounting for nullability
// and multiple productions for the same nonterm.
//
//  First is an array indexed by a terminal or
//  nonterminal and its value is a set of terminals and/or \epsilon.
//
// First[A] for nonterminal A is the set of all possible tokens that 
//    can occur as the first token of a sentence derived from A.
// First[A] for terminal A is simply the set { A }.
//
// Compute the first sets for all tokens from productions P_1, P_2, ..., P_m
//  where no production contains an alternation
//
// CALLS: computeFirstOfList(X_1, X_2, ... X_n)
procedure computeFirst({P_1, P_2, ...P_m})  // works on a list of productions
    // initial value for the First of anything
    foreach A \elemof TERMS do First[A] = {A} 
    foreach A \elemof NONTERMS do First[A] = {} 

    // loop until nothing new happens updating the First sets
    while stillchanging any First[A] do
        foreach production P_i = A::=X_1, X_2, ... X_n do
             First[A] <- First[A] \union computeFirstOfList(X_1, X_2, ... X_n)
        end foreach
    end while
end

// Procedure computeFirstOfList
//
// Computes the First of a rhs rather than just a token!
//
// This computes the set of tokens that can occur as the first
// token of a sentence derived from this rhs (right hand side) of
// of production.  That is X_1, X_2, ... X_n is a concatenation of
// terminals and nonterminals often found on the right hand side
// of a production.  This is nontrivial because some of the leading
// nonterminals on the rhs can go to epsilon.
//
// REFS: First[X_i]  (does not use Follow)
//
procedure computeFirstOfList(X_1, X_2, ... X_n)
    Tmp = {}
    k=0
    do
          k++
          Tmp <- Tmp \union First[X_k]-{\epsilon}
    while k<n & \epsilon isin First[X_k]

    // \epsilon only if X_1, X_2, ... X_n -> \epsilon
    // Note: this test can only possibly work if k==n:
    if \epsilon isin First[X_k] then Tmp <- Tmp \union {\epsilon}

    return Tmp
end

Note:

1. IMPORTANT: if grammar has no \epsilon then the procedure
   computeFirstOfList(X_1, X_2, ... X_n) simply returns First[X_1] 

2. since \epsilon is removed when adding to First inside the do/while
\epsilon can only appear when the entire argument list can be
replaced by \epsilon (called this production is called NULLABLE). 

3. First Sets can contain \epsilon as an element.  Follow Sets cannot as we'll see.

4. Conceptually, computeFirst generates a relation of the form:
First[A] = First[\alpha] \union First[\beta] \union ... \union { \epsilon },
for each production where A occurs on the left hand side (lhs).
This is based on the next point.

5. Conceptually, computeFirstOfList generates a relation of the form:
computeFirstOfList(X_1, X_2, ... X_n) = First[\alpha] \union First[\beta]
\union ... \union { \epsilon } where terms are added based on if
all of the terms before it in the rhs are nullable.

Я пытаюсь создать этот псевдокод на python.Это касается первых / последующих наборов в контекстно-свободной грамматике.Проблема, с которой я сталкиваюсь, заключается в том, что в computeFirst говорится, что цикл продолжается для каждого производства, что заставляет меня предположить, что в computeFirstOfList будет отправлено одно производство.Но в этой функции выглядит, как будто она проходит через несколько функций при k

Tmp <- Tmp \union First[X_k]-{\epsilon}

Означает ли эта строка

union(tmp,First[k]) 

или

union(First[k],epsilon)

Я действительно не понимаю.Я все еще должен заставить профсоюз функционировать, и я надеялся получить какое-то руководство.Вот мой код

NONTERMS = ['S','E','I','A','T','O','M','F']
TERMS = ['+','-','*','(',')','n','e']
epsilon='e'
FIRST ={}
grammar = {'E':[['T','I'],[epsilon]],
           'I':[['A','T','I'],[epsilon]],
           'A':[['+'],['-']],
           'T':[['F','O']],
           'O':[['M','F','O'],[epsilon]],
           'M':[['*']],
           'F':[['(','E',')'],[epsilon]]}


def computeFirst(g):
    for a in TERMS:
        FIRST[a]=set(a)

    for i in NONTERMS:
        FIRST[i]=set()

    for times in g:
        for key in g:
            Updating= True
            while Updating:
                tmp=FIRST[key].copy()
                for production in g[key]:
                    z=computeFirstOfList(production)
                    FIRST[key].update(FIRST[key].union(z))
                if tmp==FIRST[key]:
                    Updating=False

def computeFirstOfList(p):

    tmp=set()
    k=0
    n=len(p)
    while k<n:
        if(p[k]) in NONTERMS:
            tmp.update(tmp.union(FIRST[p[k]].difference(set(epsilon))))
        k+=1

    return tmp

computeFirst(grammar)
print(FIRST)

1 Ответ

0 голосов
/ 23 октября 2018

Tmp <- Tmp \union First[X_k]-{\epsilon}

Это означает

tmp = set()
tmp = tmp.union(first[x[k]].difference({epsilon})) # first should be dict of set

Я предлагаю:

def computeFirst(g):
    for a in TERMS:
        FIRST[a]={a}

    for i in NONTERMS:
        FIRST[i]=set()
...