Coverage for lobster/common/lexer.py: 85%

76 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2026-05-12 15:02 +0000

1#!/usr/bin/env python3 

2# 

3# LOBSTER - Lightweight Open BMW Software Traceability Evidence Report 

4# Copyright (C) 2022-2023 Bayerische Motoren Werke Aktiengesellschaft (BMW AG) 

5# 

6# This program is free software: you can redistribute it and/or modify 

7# it under the terms of the GNU Affero General Public License as 

8# published by the Free Software Foundation, either version 3 of the 

9# License, or (at your option) any later version. 

10# 

11# This program is distributed in the hope that it will be useful, but 

12# WITHOUT ANY WARRANTY; without even the implied warranty of 

13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 

14# Affero General Public License for more details. 

15# 

16# You should have received a copy of the GNU Affero General Public 

17# License along with this program. If not, see 

18# <https://www.gnu.org/licenses/>. 

19 

20import sys 

21import os.path 

22 

23from lobster.common import errors 

24from lobster.common import location 

25 

26 

27class Token: 

28 def __init__(self, kind, text, loc): 

29 self.kind = kind 

30 self.text = text 

31 self.loc = loc 

32 

33 def value(self): 

34 if self.kind == "STRING": 

35 return self.text[1:-1] 

36 return self.text 

37 

38 def __repr__(self): 

39 return f"Token({self.kind}, {self.text}, {self.loc})" 

40 

41 

42class Lexer: 

43 def __init__(self, mh, file_name): 

44 assert isinstance(mh, errors.Message_Handler) 

45 assert isinstance(file_name, str) 

46 assert os.path.isfile(file_name) 

47 

48 self.file_name = file_name 

49 self.mh = mh 

50 

51 with open(file_name, encoding="UTF-8") as fd: 

52 self.content = fd.read() 

53 self.length = len(self.content) 

54 

55 self.lexpos = -1 

56 self.line_nr = 1 

57 self.cc = None 

58 self.nc = self.content[0] if self.length > 0 else None 

59 

60 def advance(self): 

61 self.lexpos += 1 

62 if self.cc == "\n": 

63 self.line_nr += 1 

64 self.cc = self.nc 

65 if self.lexpos + 1 < self.length: 

66 self.nc = self.content[self.lexpos + 1] 

67 else: 

68 self.nc = None 

69 

70 def error(self, message): 

71 loc = location.File_Reference(filename = self.file_name, 

72 line = self.line_nr) 

73 self.mh.lex_error(loc, message) 

74 

75 def token(self): 

76 # Skip comments and whitespace 

77 while True: 

78 while self.nc and self.nc.isspace(): 

79 self.advance() 

80 if self.nc is None: 

81 return None 

82 self.advance() 

83 

84 if self.cc == "#": 84 ↛ anywhereline 84 didn't jump anywhere: it always raised an exception.

85 while self.cc and self.cc != "\n": 

86 self.advance() 

87 else: 

88 break 

89 

90 kind = None 

91 t_start = self.lexpos 

92 

93 if self.cc == "{": 

94 kind = "C_BRA" 

95 elif self.cc == "}": 

96 kind = "C_KET" 

97 elif self.cc == ":": 

98 kind = "COLON" 

99 elif self.cc == ",": 99 ↛ 100line 99 didn't jump to line 100 because the condition on line 99 was never true

100 kind = "COMMA" 

101 elif self.cc == ";": 

102 kind = "SEMI" 

103 elif self.cc == '"': 

104 kind = "STRING" 

105 self.advance() 

106 while self.cc != '"': 

107 self.advance() 

108 if self.cc in (None, "\n"): 108 ↛ 109line 108 didn't jump to line 109 because the condition on line 108 was never true

109 self.error("unterminated string") 

110 elif self.cc.isalpha(): 110 ↛ 115line 110 didn't jump to line 115 because the condition on line 110 was always true

111 kind = "KEYWORD" 

112 while self.nc.isalpha() or self.nc == "_": 

113 self.advance() 

114 else: 

115 self.error(f"unexpected character: '{self.cc}'") 

116 

117 t_end = self.lexpos 

118 

119 return Token( 

120 kind = kind, 

121 text = self.content[t_start : t_end + 1], 

122 loc = location.File_Reference(filename = self.file_name, 

123 line = self.line_nr)) 

124 

125 

126def sanity_test(): 

127 mh = errors.Message_Handler() 

128 lexer = Lexer(mh, sys.argv[1]) 

129 try: 

130 while True: 

131 tok = lexer.token() 

132 if tok is None: 

133 break 

134 print(tok) 

135 except errors.LOBSTER_Error: 

136 return 1 

137 return 0 

138 

139 

140if __name__ == "__main__": 

141 sanity_test()