| Viewing file:  yaml.py (15.25 KB)      -rw-r--r-- Select action/file-type:
 
  (+) |  (+) |  (+) | Code (+) | Session (+) |  (+) | SDB (+) |  (+) |  (+) |  (+) |  (+) |  (+) | 
 
"""
 yaml.py
 
 Lexer for YAML, a human-friendly data serialization language
 (http://yaml.org/).
 
 Written by Kirill Simonov <xi@resolvent.net>.
 
 License: Whatever suitable for inclusion into the Pygments package.
 """
 
 from pygments.lexer import  \
 ExtendedRegexLexer, LexerContext, include, bygroups
 from pygments.token import  \
 Text, Comment, Punctuation, Name, Literal
 
 __all__ = ['YAMLLexer']
 
 
 class YAMLLexerContext(LexerContext):
 """Indentation context for the YAML lexer."""
 
 def __init__(self, *args, **kwds):
 super(YAMLLexerContext, self).__init__(*args, **kwds)
 self.indent_stack = []
 self.indent = -1
 self.next_indent = 0
 self.block_scalar_indent = None
 
 
 def something(TokenClass):
 """Do not produce empty tokens."""
 def callback(lexer, match, context):
 text = match.group()
 if not text:
 return
 yield match.start(), TokenClass, text
 context.pos = match.end()
 return callback
 
 def reset_indent(TokenClass):
 """Reset the indentation levels."""
 def callback(lexer, match, context):
 text = match.group()
 context.indent_stack = []
 context.indent = -1
 context.next_indent = 0
 context.block_scalar_indent = None
 yield match.start(), TokenClass, text
 context.pos = match.end()
 return callback
 
 def save_indent(TokenClass, start=False):
 """Save a possible indentation level."""
 def callback(lexer, match, context):
 text = match.group()
 extra = ''
 if start:
 context.next_indent = len(text)
 if context.next_indent < context.indent:
 while context.next_indent < context.indent:
 context.indent = context.indent_stack.pop()
 if context.next_indent > context.indent:
 extra = text[context.indent:]
 text = text[:context.indent]
 else:
 context.next_indent += len(text)
 if text:
 yield match.start(), TokenClass, text
 if extra:
 yield match.start()+len(text), TokenClass.Error, extra
 context.pos = match.end()
 return callback
 
 def set_indent(TokenClass, implicit=False):
 """Set the previously saved indentation level."""
 def callback(lexer, match, context):
 text = match.group()
 if context.indent < context.next_indent:
 context.indent_stack.append(context.indent)
 context.indent = context.next_indent
 if not implicit:
 context.next_indent += len(text)
 yield match.start(), TokenClass, text
 context.pos = match.end()
 return callback
 
 def set_block_scalar_indent(TokenClass):
 """Set an explicit indentation level for a block scalar."""
 def callback(lexer, match, context):
 text = match.group()
 context.block_scalar_indent = None
 if not text:
 return
 increment = match.group(1)
 if increment:
 current_indent = max(context.indent, 0)
 increment = int(increment)
 context.block_scalar_indent = current_indent + increment
 if text:
 yield match.start(), TokenClass, text
 context.pos = match.end()
 return callback
 
 def parse_block_scalar_empty_line(IndentTokenClass, ContentTokenClass):
 """Process an empty line in a block scalar."""
 def callback(lexer, match, context):
 text = match.group()
 if (context.block_scalar_indent is None or
 len(text) <= context.block_scalar_indent):
 if text:
 yield match.start(), IndentTokenClass, text
 else:
 indentation = text[:context.block_scalar_indent]
 content = text[context.block_scalar_indent:]
 yield match.start(), IndentTokenClass, indentation
 yield (match.start()+context.block_scalar_indent,
 ContentTokenClass, content)
 context.pos = match.end()
 return callback
 
 def parse_block_scalar_indent(TokenClass):
 """Process indentation spaces in a block scalar."""
 def callback(lexer, match, context):
 text = match.group()
 if context.block_scalar_indent is None:
 if len(text) <= max(context.indent, 0):
 context.stack.pop()
 context.stack.pop()
 return
 context.block_scalar_indent = len(text)
 else:
 if len(text) < context.block_scalar_indent:
 context.stack.pop()
 context.stack.pop()
 return
 if text:
 yield match.start(), TokenClass, text
 context.pos = match.end()
 return callback
 
 def parse_plain_scalar_indent(TokenClass):
 """Process indentation spaces in a plain scalar."""
 def callback(lexer, match, context):
 text = match.group()
 if len(text) <= context.indent:
 context.stack.pop()
 context.stack.pop()
 return
 if text:
 yield match.start(), TokenClass, text
 context.pos = match.end()
 return callback
 
 
 class YAMLLexer(ExtendedRegexLexer):
 """Lexer for the YAML language."""
 
 name = 'YAML'
 aliases = ['yaml']
 filenames = ['*.yaml', '*.yml']
 mimetypes = ['text/x-yaml']
 
 tokens = {
 
 # the root rules
 'root': [
 # ignored whitespaces
 (r'[ ]+(?=#|$)', Text.Blank),
 # line breaks
 (r'\n+', Text.Break),
 # a comment
 (r'#[^\n]*', Comment.Single),
 # the '%YAML' directive
 (r'^%YAML(?=[ ]|$)', reset_indent(Name.Directive),
 'yaml-directive'),
 # the %TAG directive
 (r'^%TAG(?=[ ]|$)', reset_indent(Name.Directive),
 'tag-directive'),
 # document start and document end indicators
 (r'^(?:---|\.\.\.)(?=[ ]|$)',
 reset_indent(Punctuation.Document), 'block-line'),
 # indentation spaces
 (r'[ ]*(?![ \t\n\r\f\v]|$)',
 save_indent(Text.Indent, start=True),
 ('block-line', 'indentation')),
 ],
 
 # trailing whitespaces after directives or a block scalar indicator
 'ignored-line': [
 # ignored whitespaces
 (r'[ ]+(?=#|$)', Text.Blank),
 # a comment
 (r'#[^\n]*', Comment.Single),
 # line break
 (r'\n', Text.Break, '#pop:2'),
 ],
 
 # the %YAML directive
 'yaml-directive': [
 # the version number
 (r'([ ]+)([0-9]+\.[0-9]+)',
 bygroups(Text.Blank, Literal.Version), 'ignored-line'),
 ],
 
 # the %YAG directive
 'tag-directive': [
 # a tag handle and the corresponding prefix
 (r'([ ]+)(!|![0-9A-Za-z_-]*!)'
 r'([ ]+)(!|!?[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)',
 bygroups(Text.Blank, Name.Type, Text.Blank, Name.Type),
 'ignored-line'),
 ],
 
 # block scalar indicators and indentation spaces
 'indentation': [
 # trailing whitespaces are ignored
 (r'[ ]*$', something(Text.Blank), '#pop:2'),
 # whitespaces preceding block collection indicators
 (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text.Indent)),
 # block collection indicators
 (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)),
 # the beginning a block line
 (r'[ ]*', save_indent(Text.Indent), '#pop'),
 ],
 
 # an indented line in the block context
 'block-line': [
 # the line end
 (r'[ ]*(?=#|$)', something(Text.Blank), '#pop'),
 # whitespaces separating tokens
 (r'[ ]+', Text.Blank),
 # tags, anchors and aliases,
 include('descriptors'),
 # block collections and scalars
 include('block-nodes'),
 # flow collections and quoted scalars
 include('flow-nodes'),
 # a plain scalar
 (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`-]|[?:-][^ \t\n\r\f\v])',
 something(Literal.Scalar.Plain),
 'plain-scalar-in-block-context'),
 ],
 
 # tags, anchors, aliases
 'descriptors' : [
 # a full-form tag
 (r'!<[0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+>', Name.Type),
 # a tag in the form '!', '!suffix' or '!handle!suffix'
 (r'!(?:[0-9A-Za-z_-]+)?'
 r'(?:![0-9A-Za-z;/?:@&=+$,_.!~*\'()\[\]%-]+)?', Name.Type),
 # an anchor
 (r'&[0-9A-Za-z_-]+', Name.Anchor),
 # an alias
 (r'\*[0-9A-Za-z_-]+', Name.Alias),
 ],
 
 # block collections and scalars
 'block-nodes': [
 # implicit key
 (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)),
 # literal and folded scalars
 (r'[|>]', Punctuation.Indicator,
 ('block-scalar-content', 'block-scalar-header')),
 ],
 
 # flow collections and quoted scalars
 'flow-nodes': [
 # a flow sequence
 (r'\[', Punctuation.Indicator, 'flow-sequence'),
 # a flow mapping
 (r'\{', Punctuation.Indicator, 'flow-mapping'),
 # a single-quoted scalar
 (r'\'', Literal.Scalar.Flow.Quote, 'single-quoted-scalar'),
 # a double-quoted scalar
 (r'\"', Literal.Scalar.Flow.Quote, 'double-quoted-scalar'),
 ],
 
 # the content of a flow collection
 'flow-collection': [
 # whitespaces
 (r'[ ]+', Text.Blank),
 # line breaks
 (r'\n+', Text.Break),
 # a comment
 (r'#[^\n]*', Comment.Single),
 # simple indicators
 (r'[?:,]', Punctuation.Indicator),
 # tags, anchors and aliases
 include('descriptors'),
 # nested collections and quoted scalars
 include('flow-nodes'),
 # a plain scalar
 (r'(?=[^ \t\n\r\f\v?:,\[\]{}#&*!|>\'"%@`])',
 something(Literal.Scalar.Plain),
 'plain-scalar-in-flow-context'),
 ],
 
 # a flow sequence indicated by '[' and ']'
 'flow-sequence': [
 # include flow collection rules
 include('flow-collection'),
 # the closing indicator
 (r'\]', Punctuation.Indicator, '#pop'),
 ],
 
 # a flow mapping indicated by '{' and '}'
 'flow-mapping': [
 # include flow collection rules
 include('flow-collection'),
 # the closing indicator
 (r'\}', Punctuation.Indicator, '#pop'),
 ],
 
 # block scalar lines
 'block-scalar-content': [
 # line break
 (r'\n', Text.Break),
 # empty line
 (r'^[ ]+$',
 parse_block_scalar_empty_line(Text.Indent,
 Literal.Scalar.Block)),
 # indentation spaces (we may leave the state here)
 (r'^[ ]*', parse_block_scalar_indent(Text.Indent)),
 # line content
 (r'[^\n\r\f\v]+', Literal.Scalar.Block),
 ],
 
 # the content of a literal or folded scalar
 'block-scalar-header': [
 # indentation indicator followed by chomping flag
 (r'([1-9])?[+-]?(?=[ ]|$)',
 set_block_scalar_indent(Punctuation.Indicator),
 'ignored-line'),
 # chomping flag followed by indentation indicator
 (r'[+-]?([1-9])?(?=[ ]|$)',
 set_block_scalar_indent(Punctuation.Indicator),
 'ignored-line'),
 ],
 
 # ignored and regular whitespaces in quoted scalars
 'quoted-scalar-whitespaces': [
 # leading and trailing whitespaces are ignored
 (r'^[ ]+|[ ]+$', Text.Blank),
 # line breaks are ignored
 (r'\n+', Text.Break),
 # other whitespaces are a part of the value
 (r'[ ]+', Literal.Scalar.Flow),
 ],
 
 # single-quoted scalars
 'single-quoted-scalar': [
 # include whitespace and line break rules
 include('quoted-scalar-whitespaces'),
 # escaping of the quote character
 (r'\'\'', Literal.Scalar.Flow.Escape),
 # regular non-whitespace characters
 (r'[^ \t\n\r\f\v\']+', Literal.Scalar.Flow),
 # the closing quote
 (r'\'', Literal.Scalar.Flow.Quote, '#pop'),
 ],
 
 # double-quoted scalars
 'double-quoted-scalar': [
 # include whitespace and line break rules
 include('quoted-scalar-whitespaces'),
 # escaping of special characters
 (r'\\[0abt\tn\nvfre "\\N_LP]', Literal.Scalar.Flow.Escape),
 # escape codes
 (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})',
 Literal.Scalar.Flow.Escape),
 # regular non-whitespace characters
 (r'[^ \t\n\r\f\v\"\\]+', Literal.Scalar.Flow),
 # the closing quote
 (r'"', Literal.Scalar.Flow.Quote, '#pop'),
 ],
 
 # the beginning of a new line while scanning a plain scalar
 'plain-scalar-in-block-context-new-line': [
 # empty lines
 (r'^[ ]+$', Text.Blank),
 # line breaks
 (r'\n+', Text.Break),
 # document start and document end indicators
 (r'^(?=---|\.\.\.)', something(Punctuation.Document), '#pop:3'),
 # indentation spaces (we may leave the block line state here)
 (r'^[ ]*', parse_plain_scalar_indent(Text.Indent), '#pop'),
 ],
 
 # a plain scalar in the block context
 'plain-scalar-in-block-context': [
 # the scalar ends with the ':' indicator
 (r'[ ]*(?=:[ ]|:$)', something(Text.Blank), '#pop'),
 # the scalar ends with whitespaces followed by a comment
 (r'[ ]+(?=#)', Text.Blank, '#pop'),
 # trailing whitespaces are ignored
 (r'[ ]+$', Text.Blank),
 # line breaks are ignored
 (r'\n+', Text.Break, 'plain-scalar-in-block-context-new-line'),
 # other whitespaces are a part of the value
 (r'[ ]+', Literal.Scalar.Plain),
 # regular non-whitespace characters
 (r'(?::(?![ \t\n\r\f\v])|[^ \t\n\r\f\v:])+',
 Literal.Scalar.Plain),
 ],
 
 # a plain scalar is the flow context
 'plain-scalar-in-flow-context': [
 # the scalar ends with an indicator character
 (r'[ ]*(?=[,:?\[\]{}])', something(Text.Blank), '#pop'),
 # the scalar ends with a comment
 (r'[ ]+(?=#)', Text.Blank, '#pop'),
 # leading and trailing whitespaces are ignored
 (r'^[ ]+|[ ]+$', Text.Blank),
 # line breaks are ignored
 (r'\n+', Text.Break),
 # other whitespaces are a part of the value
 (r'[ ]+', Literal.Scalar.Plain),
 # regular non-whitespace characters
 (r'[^ \t\n\r\f\v,:?\[\]{}]+', Literal.Scalar.Plain),
 ],
 
 }
 
 def get_tokens_unprocessed(self, text=None, context=None):
 if context is None:
 context = YAMLLexerContext(text, 0)
 return super(YAMLLexer, self).get_tokens_unprocessed(text, context)
 
 
 
 |