用Python实现词法分析器（LexicalAnalyzer）-创新互联

　　from __future__ import print_function

超过10多年行业经验，技术领先，服务至上的经营模式，全靠网络和口碑获得客户，为自己降低成本，也就是为客户降低成本。到目前业务范围包括了：成都做网站、网站设计，成都网站推广，成都网站优化，整体网络托管，微信平台小程序开发，微信开发，重庆APP软件开发，同时也可以让客户的网站和网络营销和我们一样获得订单和生意！

　　import sys

　　# following two must remain in the same order

　　tk_EOI, tk_Mul, tk_Div, tk_Mod, tk_Add, tk_Sub, tk_Negate, tk_Not, tk_Lss, tk_Leq, tk_Gtr, \

　　tk_Geq, tk_Eq, tk_Neq, tk_Assign, tk_And, tk_Or, tk_If, tk_Else, tk_While, tk_Print, \

　　tk_Putc, tk_Lparen, tk_Rparen, tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma, tk_Ident, \

　　tk_Integer, tk_String = range(31)

　　all_syms = ["End_of_input", "Op_multiply", "Op_divide", "Op_mod", "Op_add", "Op_subtract",

　　"Op_negate", "Op_not", "Op_less", "Op_lessequal", "Op_greater", "Op_greaterequal",

　　"Op_equal", "Op_notequal", "Op_assign", "Op_and", "Op_or", "Keyword_if",

　　"Keyword_else", "Keyword_while", "Keyword_print", "Keyword_putc", "LeftParen",

　　"RightParen", "LeftBrace", "RightBrace", "Semicolon", "Comma", "Identifier",

　　"Integer", "String"]

　　# single character only symbols

　　symbols = { '{': tk_Lbrace, '}': tk_Rbrace, '(': tk_Lparen, ')': tk_Rparen, '+': tk_Add, '-': tk_Sub,

　　'*': tk_Mul, '%': tk_Mod, ';': tk_Semi, ',': tk_Comma }

　　key_words = {'if': tk_If, 'else': tk_Else, 'print': tk_Print, 'putc': tk_Putc, 'while': tk_While}

　　the_ch = " " # dummy first char - but it must be a space

　　the_col = 0

　　the_line = 1

　　input_file = None

　　#*** show error and exit

　　def error(line, col, msg):

　　print(line, col, msg)

　　exit(1)

　　#*** get the next character from the input

　　def next_ch():

　　global the_ch, the_col, the_line

　　the_ch = input_file.read(1)

　　the_col += 1

　　if the_ch == '\n':

　　the_line += 1

　　the_col = 0

　　return the_ch

　　#*** 'x' - character constants

　　def char_lit(err_line, err_col):

　　n = ord(next_ch()) # skip opening quote

　　if the_ch == '\'':

　　error(err_line, err_col, "empty character constant")

　　elif the_ch == '\\':

　　next_ch()

　　if the_ch == 'n':

　　n = 10

　　elif the_ch == '\\':

　　n = ord('\\')

　　else:

　　error(err_line, err_col, "unknown escape sequence \\%c" % (the_ch))

　　if next_ch() != '\'':

　　error(err_line, err_col, "multi-character constant")

　　next_ch()

　　return tk_Integer, err_line, err_col, n

　　#*** process divide or comments

　　def div_or_cmt(err_line, err_col):

　　if next_ch() != '*':

　　return tk_Div, err_line, err_col

　　# comment found

　　next_ch()

　　while True:

　　if the_ch == '*':

　　if next_ch() == '/':

　　next_ch()

　　return gettok()

　　elif len(the_ch) == 0:

　　error(err_line, err_col, "EOF in comment")

　　else:

　　next_ch()

　　#*** "string"

　　def string_lit(start, err_line, err_col):

　　text = ""

　　while next_ch() != start:

　　if len(the_ch) == 0:

　　error(err_line, err_col, "EOF while scanning string literal")

　　if the_ch == '\n':

　　error(err_line, err_col, "EOL while scanning string literal")

　　text += the_ch

　　next_ch()

　　return tk_String, err_line, err_col, text

　　#*** handle identifiers and integers

　　def ident_or_int(err_line, err_col):

　　is_number = True

　　text = ""

　　while the_ch.isalnum() or the_ch == '_':

　　text += the_ch

　　if not the_ch.isdigit():

　　is_number = False

　　next_ch()

　　if len(text) == 0:

　　error(err_line, err_col, "ident_or_int: unrecognized character: (%d) '%c'" % (ord(the_ch), the_ch))

　　if text[0].isdigit():

　　if not is_number:

　　error(err_line, err_col, "invalid number: %s" % (text))

　　n = int(text)

　　return tk_Integer, err_line, err_col, n

　　if text in key_words:

　　return key_words[text], err_line, err_col

　　return tk_Ident, err_line, err_col, text

　　#*** look ahead for '>=', etc.

　　def follow(expect, ifyes, ifno, err_line, err_col):

　　if next_ch() == expect:

　　next_ch()

　　return ifyes, err_line, err_col

　　if ifno == tk_EOI: 郑州人流医院哪家好 http://mobile.zhongyuan120.com/

　　error(err_line, err_col, "follow: unrecognized character: (%d) '%c'" % (ord(the_ch), the_ch))

　　return ifno, err_line, err_col

　　#*** return the next token type

　　def gettok():

　　while the_ch.isspace():

　　next_ch()

　　err_line = the_line

　　err_col = the_col

　　if len(the_ch) == 0: return tk_EOI, err_line, err_col

　　elif the_ch == '/': return div_or_cmt(err_line, err_col)

　　elif the_ch == '\'': return char_lit(err_line, err_col)

　　elif the_ch == '<': return follow('=', tk_Leq, tk_Lss, err_line, err_col)

　　elif the_ch == '>': return follow('=', tk_Geq, tk_Gtr, err_line, err_col)

　　elif the_ch == '=': return follow('=', tk_Eq, tk_Assign, err_line, err_col)

　　elif the_ch == '!': return follow('=', tk_Neq, tk_Not, err_line, err_col)

　　elif the_ch == '&': return follow('&', tk_And, tk_EOI, err_line, err_col)

　　elif the_ch == '|': return follow('|', tk_Or, tk_EOI, err_line, err_col)

　　elif the_ch == '"': return string_lit(the_ch, err_line, err_col)

　　elif the_ch in symbols:

　　sym = symbols[the_ch]

　　next_ch()

　　return sym, err_line, err_col

　　else: return ident_or_int(err_line, err_col)

　　#*** main driver

　　input_file = sys.stdin

　　if len(sys.argv) > 1:

　　try:

　　input_file = open(sys.argv[1], "r", 4096)

　　except IOError as e:

　　error(0, 0, "Can't open %s" % sys.argv[1])

　　while True:

　　t = gettok()

　　tok = t[0]

　　line = t[1]

　　col = t[2]

　　print("%5d %5d %-14s" % (line, col, all_syms[tok]), end='')

　　if tok == tk_Integer: print(" %5d" % (t[3]))

　　elif tok == tk_Ident: print(" %s" % (t[3]))

　　elif tok == tk_String: print(' "%s"' % (t[3]))

　　else: print("")

　　if tok == tk_EOI:

　　break

　　输出(测试用例三)

　　5 16 Keyword_print

　　5 40 Op_subtract

　　6 16 Keyword_putc

　　6 40 Op_less

　　7 16 Keyword_if

　　7 40 Op_greater

　　8 16 Keyword_else

　　8 40 Op_lessequal

　　9 16 Keyword_while

　　9 40 Op_greaterequal

　　10 16 LeftBrace

　　10 40 Op_equal

　　11 16 RightBrace

　　11 40 Op_notequal

　　12 16 LeftParen

　　12 40 Op_and

　　13 16 RightParen

　　13 40 Op_or

　　14 16 Op_subtract

　　14 40 Semicolon

　　15 16 Op_not

　　15 40 Comma

　　16 16 Op_multiply

　　16 40 Op_assign

　　17 16 Op_divide

　　17 40 Integer 42

　　18 16 Op_mod

　　18 40 String "String literal"

　　19 16 Op_add

　　19 40 Identifier variable_name

　　20 26 Integer 10

　　21 26 Integer 92

　　22 26 Integer 32

　　23 1 End_of_input

分享名称：用Python实现词法分析器（LexicalAnalyzer）-创新互联
当前路径：http://ybzwz.com/article/djjdhc.html

用Python实现词法分析器（LexicalAnalyzer）-创新互联

其他资讯