Coverage for lobster/common/lexer.py: 12%
76 statements
« prev ^ index » next coverage.py v7.10.7, created at 2026-05-12 15:02 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2026-05-12 15:02 +0000
1#!/usr/bin/env python3
2#
3# LOBSTER - Lightweight Open BMW Software Traceability Evidence Report
4# Copyright (C) 2022-2023 Bayerische Motoren Werke Aktiengesellschaft (BMW AG)
5#
6# This program is free software: you can redistribute it and/or modify
7# it under the terms of the GNU Affero General Public License as
8# published by the Free Software Foundation, either version 3 of the
9# License, or (at your option) any later version.
10#
11# This program is distributed in the hope that it will be useful, but
12# WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14# Affero General Public License for more details.
15#
16# You should have received a copy of the GNU Affero General Public
17# License along with this program. If not, see
18# <https://www.gnu.org/licenses/>.
20import sys
21import os.path
23from lobster.common import errors
24from lobster.common import location
27class Token:
28 def __init__(self, kind, text, loc):
29 self.kind = kind
30 self.text = text
31 self.loc = loc
33 def value(self):
34 if self.kind == "STRING":
35 return self.text[1:-1]
36 return self.text
38 def __repr__(self):
39 return f"Token({self.kind}, {self.text}, {self.loc})"
42class Lexer:
43 def __init__(self, mh, file_name):
44 assert isinstance(mh, errors.Message_Handler)
45 assert isinstance(file_name, str)
46 assert os.path.isfile(file_name)
48 self.file_name = file_name
49 self.mh = mh
51 with open(file_name, encoding="UTF-8") as fd:
52 self.content = fd.read()
53 self.length = len(self.content)
55 self.lexpos = -1
56 self.line_nr = 1
57 self.cc = None
58 self.nc = self.content[0] if self.length > 0 else None
60 def advance(self):
61 self.lexpos += 1
62 if self.cc == "\n":
63 self.line_nr += 1
64 self.cc = self.nc
65 if self.lexpos + 1 < self.length:
66 self.nc = self.content[self.lexpos + 1]
67 else:
68 self.nc = None
70 def error(self, message):
71 loc = location.File_Reference(filename = self.file_name,
72 line = self.line_nr)
73 self.mh.lex_error(loc, message)
75 def token(self):
76 # Skip comments and whitespace
77 while True:
78 while self.nc and self.nc.isspace():
79 self.advance()
80 if self.nc is None:
81 return None
82 self.advance()
84 if self.cc == "#":
85 while self.cc and self.cc != "\n":
86 self.advance()
87 else:
88 break
90 kind = None
91 t_start = self.lexpos
93 if self.cc == "{":
94 kind = "C_BRA"
95 elif self.cc == "}":
96 kind = "C_KET"
97 elif self.cc == ":":
98 kind = "COLON"
99 elif self.cc == ",":
100 kind = "COMMA"
101 elif self.cc == ";":
102 kind = "SEMI"
103 elif self.cc == '"':
104 kind = "STRING"
105 self.advance()
106 while self.cc != '"':
107 self.advance()
108 if self.cc in (None, "\n"):
109 self.error("unterminated string")
110 elif self.cc.isalpha():
111 kind = "KEYWORD"
112 while self.nc.isalpha() or self.nc == "_":
113 self.advance()
114 else:
115 self.error(f"unexpected character: '{self.cc}'")
117 t_end = self.lexpos
119 return Token(
120 kind = kind,
121 text = self.content[t_start : t_end + 1],
122 loc = location.File_Reference(filename = self.file_name,
123 line = self.line_nr))
126def sanity_test():
127 mh = errors.Message_Handler()
128 lexer = Lexer(mh, sys.argv[1])
129 try:
130 while True:
131 tok = lexer.token()
132 if tok is None:
133 break
134 print(tok)
135 except errors.LOBSTER_Error:
136 return 1
137 return 0
140if __name__ == "__main__":
141 sanity_test()