1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """Tokenize DNS master file format"""
17
18 import cStringIO
19 import sys
20
21 import dns.exception
22 import dns.name
23 import dns.ttl
24
25 _DELIMITERS = {
26 ' ' : True,
27 '\t' : True,
28 '\n' : True,
29 ';' : True,
30 '(' : True,
31 ')' : True,
32 '"' : True }
33
34 _QUOTING_DELIMITERS = { '"' : True }
35
36 EOF = 0
37 EOL = 1
38 WHITESPACE = 2
39 IDENTIFIER = 3
40 QUOTED_STRING = 4
41 COMMENT = 5
42 DELIMITER = 6
43
45 """An attempt was made to unget a token when the unget buffer was full."""
46
48 """A DNS master file format token.
49
50 @ivar ttype: The token type
51 @type ttype: int
52 @ivar value: The token value
53 @type value: string
54 @ivar has_escape: Does the token value contain escapes?
55 @type has_escape: bool
56 """
57
58 - def __init__(self, ttype, value='', has_escape=False):
59 """Initialize a token instance.
60
61 @param ttype: The token type
62 @type ttype: int
63 @param value: The token value
64 @type value: string
65 @param has_escape: Does the token value contain escapes?
66 @type has_escape: bool
67 """
68 self.ttype = ttype
69 self.value = value
70 self.has_escape = has_escape
71
73 return self.ttype == EOF
74
76 return self.ttype == EOL
77
80
83
86
89
92
94 return (self.ttype == EOL or self.ttype == EOF)
95
97 if not isinstance(other, Token):
98 return False
99 return (self.ttype == other.ttype and
100 self.value == other.value)
101
103 if not isinstance(other, Token):
104 return True
105 return (self.ttype != other.ttype or
106 self.value != other.value)
107
109 return '%d "%s"' % (self.ttype, self.value)
110
139
140
141
144
146 return iter((self.ttype, self.value))
147
149 if i == 0:
150 return self.ttype
151 elif i == 1:
152 return self.value
153 else:
154 raise IndexError
155
157 """A DNS master file format tokenizer.
158
159 A token is a (type, value) tuple, where I{type} is an int, and
160 I{value} is a string. The valid types are EOF, EOL, WHITESPACE,
161 IDENTIFIER, QUOTED_STRING, COMMENT, and DELIMITER.
162
163 @ivar file: The file to tokenize
164 @type file: file
165 @ivar ungotten_char: The most recently ungotten character, or None.
166 @type ungotten_char: string
167 @ivar ungotten_token: The most recently ungotten token, or None.
168 @type ungotten_token: (int, string) token tuple
169 @ivar multiline: The current multiline level. This value is increased
170 by one every time a '(' delimiter is read, and decreased by one every time
171 a ')' delimiter is read.
172 @type multiline: int
173 @ivar quoting: This variable is true if the tokenizer is currently
174 reading a quoted string.
175 @type quoting: bool
176 @ivar eof: This variable is true if the tokenizer has encountered EOF.
177 @type eof: bool
178 @ivar delimiters: The current delimiter dictionary.
179 @type delimiters: dict
180 @ivar line_number: The current line number
181 @type line_number: int
182 @ivar filename: A filename that will be returned by the L{where} method.
183 @type filename: string
184 """
185
186 - def __init__(self, f=sys.stdin, filename=None):
187 """Initialize a tokenizer instance.
188
189 @param f: The file to tokenize. The default is sys.stdin.
190 This parameter may also be a string, in which case the tokenizer
191 will take its input from the contents of the string.
192 @type f: file or string
193 @param filename: the name of the filename that the L{where} method
194 will return.
195 @type filename: string
196 """
197
198 if isinstance(f, (str, unicode)):
199 f = cStringIO.StringIO(f)
200 if filename is None:
201 filename = '<string>'
202 else:
203 if filename is None:
204 if f is sys.stdin:
205 filename = '<stdin>'
206 else:
207 filename = '<file>'
208 self.file = f
209 self.ungotten_char = None
210 self.ungotten_token = None
211 self.multiline = 0
212 self.quoting = False
213 self.eof = False
214 self.delimiters = _DELIMITERS
215 self.line_number = 1
216 self.filename = filename
217
219 """Read a character from input.
220 @rtype: string
221 """
222
223 if self.ungotten_char is None:
224 if self.eof:
225 c = ''
226 else:
227 c = self.file.read(1)
228 if c == '':
229 self.eof = True
230 elif c == '\n':
231 self.line_number += 1
232 else:
233 c = self.ungotten_char
234 self.ungotten_char = None
235 return c
236
238 """Return the current location in the input.
239
240 @rtype: (string, int) tuple. The first item is the filename of
241 the input, the second is the current line number.
242 """
243
244 return (self.filename, self.line_number)
245
247 """Unget a character.
248
249 The unget buffer for characters is only one character large; it is
250 an error to try to unget a character when the unget buffer is not
251 empty.
252
253 @param c: the character to unget
254 @type c: string
255 @raises UngetBufferFull: there is already an ungotten char
256 """
257
258 if not self.ungotten_char is None:
259 raise UngetBufferFull
260 self.ungotten_char = c
261
263 """Consume input until a non-whitespace character is encountered.
264
265 The non-whitespace character is then ungotten, and the number of
266 whitespace characters consumed is returned.
267
268 If the tokenizer is in multiline mode, then newlines are whitespace.
269
270 @rtype: int
271 """
272
273 skipped = 0
274 while True:
275 c = self._get_char()
276 if c != ' ' and c != '\t':
277 if (c != '\n') or not self.multiline:
278 self._unget_char(c)
279 return skipped
280 skipped += 1
281
282 - def get(self, want_leading = False, want_comment = False):
283 """Get the next token.
284
285 @param want_leading: If True, return a WHITESPACE token if the
286 first character read is whitespace. The default is False.
287 @type want_leading: bool
288 @param want_comment: If True, return a COMMENT token if the
289 first token read is a comment. The default is False.
290 @type want_comment: bool
291 @rtype: Token object
292 @raises dns.exception.UnexpectedEnd: input ended prematurely
293 @raises dns.exception.SyntaxError: input was badly formed
294 """
295
296 if not self.ungotten_token is None:
297 token = self.ungotten_token
298 self.ungotten_token = None
299 if token.is_whitespace():
300 if want_leading:
301 return token
302 elif token.is_comment():
303 if want_comment:
304 return token
305 else:
306 return token
307 skipped = self.skip_whitespace()
308 if want_leading and skipped > 0:
309 return Token(WHITESPACE, ' ')
310 token = ''
311 ttype = IDENTIFIER
312 has_escape = False
313 while True:
314 c = self._get_char()
315 if c == '' or c in self.delimiters:
316 if c == '' and self.quoting:
317 raise dns.exception.UnexpectedEnd
318 if token == '' and ttype != QUOTED_STRING:
319 if c == '(':
320 self.multiline += 1
321 self.skip_whitespace()
322 continue
323 elif c == ')':
324 if not self.multiline > 0:
325 raise dns.exception.SyntaxError
326 self.multiline -= 1
327 self.skip_whitespace()
328 continue
329 elif c == '"':
330 if not self.quoting:
331 self.quoting = True
332 self.delimiters = _QUOTING_DELIMITERS
333 ttype = QUOTED_STRING
334 continue
335 else:
336 self.quoting = False
337 self.delimiters = _DELIMITERS
338 self.skip_whitespace()
339 continue
340 elif c == '\n':
341 return Token(EOL, '\n')
342 elif c == ';':
343 while 1:
344 c = self._get_char()
345 if c == '\n' or c == '':
346 break
347 token += c
348 if want_comment:
349 self._unget_char(c)
350 return Token(COMMENT, token)
351 elif c == '':
352 if self.multiline:
353 raise dns.exception.SyntaxError('unbalanced parentheses')
354 return Token(EOF)
355 elif self.multiline:
356 self.skip_whitespace()
357 token = ''
358 continue
359 else:
360 return Token(EOL, '\n')
361 else:
362
363
364
365 token = c
366 ttype = DELIMITER
367 else:
368 self._unget_char(c)
369 break
370 elif self.quoting:
371 if c == '\\':
372 c = self._get_char()
373 if c == '':
374 raise dns.exception.UnexpectedEnd
375 if c.isdigit():
376 c2 = self._get_char()
377 if c2 == '':
378 raise dns.exception.UnexpectedEnd
379 c3 = self._get_char()
380 if c == '':
381 raise dns.exception.UnexpectedEnd
382 if not (c2.isdigit() and c3.isdigit()):
383 raise dns.exception.SyntaxError
384 c = chr(int(c) * 100 + int(c2) * 10 + int(c3))
385 elif c == '\n':
386 raise dns.exception.SyntaxError('newline in quoted string')
387 elif c == '\\':
388
389
390
391
392 token += c
393 has_escape = True
394 c = self._get_char()
395 if c == '' or c == '\n':
396 raise dns.exception.UnexpectedEnd
397 token += c
398 if token == '' and ttype != QUOTED_STRING:
399 if self.multiline:
400 raise dns.exception.SyntaxError('unbalanced parentheses')
401 ttype = EOF
402 return Token(ttype, token, has_escape)
403
405 """Unget a token.
406
407 The unget buffer for tokens is only one token large; it is
408 an error to try to unget a token when the unget buffer is not
409 empty.
410
411 @param token: the token to unget
412 @type token: Token object
413 @raises UngetBufferFull: there is already an ungotten token
414 """
415
416 if not self.ungotten_token is None:
417 raise UngetBufferFull
418 self.ungotten_token = token
419
421 """Return the next item in an iteration.
422 @rtype: (int, string)
423 """
424
425 token = self.get()
426 if token.is_eof():
427 raise StopIteration
428 return token
429
432
433
434
448
450 """Read the next token and interpret it as an 8-bit unsigned
451 integer.
452
453 @raises dns.exception.SyntaxError:
454 @rtype: int
455 """
456
457 value = self.get_int()
458 if value < 0 or value > 255:
459 raise dns.exception.SyntaxError('%d is not an unsigned 8-bit integer' % value)
460 return value
461
463 """Read the next token and interpret it as a 16-bit unsigned
464 integer.
465
466 @raises dns.exception.SyntaxError:
467 @rtype: int
468 """
469
470 value = self.get_int()
471 if value < 0 or value > 65535:
472 raise dns.exception.SyntaxError('%d is not an unsigned 16-bit integer' % value)
473 return value
474
492
504
506 """Read the next token and raise an exception if it is not an identifier.
507
508 @raises dns.exception.SyntaxError:
509 @rtype: string
510 """
511
512 token = self.get().unescape()
513 if not token.is_identifier():
514 raise dns.exception.SyntaxError('expecting an identifier')
515 return token.value
516
527
529 """Read the next token and raise an exception if it isn't EOL or
530 EOF.
531
532 @raises dns.exception.SyntaxError:
533 @rtype: string
534 """
535
536 token = self.get()
537 if not token.is_eol_or_eof():
538 raise dns.exception.SyntaxError('expected EOL or EOF, got %d "%s"' % (token.ttype, token.value))
539 return token.value
540
546