Coverage for lobster/common/lexer.py: 12%

76 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2026-04-16 05:31 +0000

1#!/usr/bin/env python3 

2# 

3# LOBSTER - Lightweight Open BMW Software Traceability Evidence Report 

4# Copyright (C) 2022-2023 Bayerische Motoren Werke Aktiengesellschaft (BMW AG) 

5# 

6# This program is free software: you can redistribute it and/or modify 

7# it under the terms of the GNU Affero General Public License as 

8# published by the Free Software Foundation, either version 3 of the 

9# License, or (at your option) any later version. 

10# 

11# This program is distributed in the hope that it will be useful, but 

12# WITHOUT ANY WARRANTY; without even the implied warranty of 

13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 

14# Affero General Public License for more details. 

15# 

16# You should have received a copy of the GNU Affero General Public 

17# License along with this program. If not, see 

18# <https://www.gnu.org/licenses/>. 

19 

20import sys 

21import os.path 

22 

23from lobster.common import errors 

24from lobster.common import location 

25 

26 

27class Token: 

28 def __init__(self, kind, text, loc): 

29 self.kind = kind 

30 self.text = text 

31 self.loc = loc 

32 

33 def value(self): 

34 if self.kind == "STRING": 

35 return self.text[1:-1] 

36 return self.text 

37 

38 def __repr__(self): 

39 return "Token(%s, %s, %s)" % (self.kind, 

40 self.text, 

41 self.loc) 

42 

43 

44class Lexer: 

45 def __init__(self, mh, file_name): 

46 assert isinstance(mh, errors.Message_Handler) 

47 assert isinstance(file_name, str) 

48 assert os.path.isfile(file_name) 

49 

50 self.file_name = file_name 

51 self.mh = mh 

52 

53 with open(file_name, "r", encoding="UTF-8") as fd: 

54 self.content = fd.read() 

55 self.length = len(self.content) 

56 

57 self.lexpos = -1 

58 self.line_nr = 1 

59 self.cc = None 

60 self.nc = self.content[0] if self.length > 0 else None 

61 

62 def advance(self): 

63 self.lexpos += 1 

64 if self.cc == "\n": 

65 self.line_nr += 1 

66 self.cc = self.nc 

67 if self.lexpos + 1 < self.length: 

68 self.nc = self.content[self.lexpos + 1] 

69 else: 

70 self.nc = None 

71 

72 def error(self, message): 

73 loc = location.File_Reference(filename = self.file_name, 

74 line = self.line_nr) 

75 self.mh.lex_error(loc, message) 

76 

77 def token(self): 

78 # Skip comments and whitespace 

79 while True: 

80 while self.nc and self.nc.isspace(): 

81 self.advance() 

82 if self.nc is None: 

83 return None 

84 self.advance() 

85 

86 if self.cc == "#": 

87 while self.cc and self.cc != "\n": 

88 self.advance() 

89 else: 

90 break 

91 

92 kind = None 

93 t_start = self.lexpos 

94 

95 if self.cc == "{": 

96 kind = "C_BRA" 

97 elif self.cc == "}": 

98 kind = "C_KET" 

99 elif self.cc == ":": 

100 kind = "COLON" 

101 elif self.cc == ",": 

102 kind = "COMMA" 

103 elif self.cc == ";": 

104 kind = "SEMI" 

105 elif self.cc == '"': 

106 kind = "STRING" 

107 self.advance() 

108 while self.cc != '"': 

109 self.advance() 

110 if self.cc in (None, "\n"): 

111 self.error("unterminated string") 

112 elif self.cc.isalpha(): 

113 kind = "KEYWORD" 

114 while self.nc.isalpha() or self.nc == "_": 

115 self.advance() 

116 else: 

117 self.error("unexpected character: '%s'" % self.cc) 

118 

119 t_end = self.lexpos 

120 

121 return Token( 

122 kind = kind, 

123 text = self.content[t_start : t_end + 1], 

124 loc = location.File_Reference(filename = self.file_name, 

125 line = self.line_nr)) 

126 

127 

128def sanity_test(): 

129 mh = errors.Message_Handler() 

130 lexer = Lexer(mh, sys.argv[1]) 

131 try: 

132 while True: 

133 tok = lexer.token() 

134 if tok is None: 

135 break 

136 print(tok) 

137 except errors.LOBSTER_Error: 

138 return 1 

139 return 0 

140 

141 

142if __name__ == "__main__": 

143 sanity_test()