| Viewing file:  _parser.py (10 KB)      -rw-r--r-- Select action/file-type:
 
  (+) |  (+) |  (+) | Code (+) | Session (+) |  (+) | SDB (+) |  (+) |  (+) |  (+) |  (+) |  (+) | 
 
"""Handwritten parser of dependency specifiers.
 The docstring for each __parse_* function contains EBNF-inspired grammar representing
 the implementation.
 """
 
 from __future__ import annotations
 
 import ast
 from typing import NamedTuple, Sequence, Tuple, Union
 
 from ._tokenizer import DEFAULT_RULES, Tokenizer
 
 
 class Node:
 def __init__(self, value: str) -> None:
 self.value = value
 
 def __str__(self) -> str:
 return self.value
 
 def __repr__(self) -> str:
 return f"<{self.__class__.__name__}('{self}')>"
 
 def serialize(self) -> str:
 raise NotImplementedError
 
 
 class Variable(Node):
 def serialize(self) -> str:
 return str(self)
 
 
 class Value(Node):
 def serialize(self) -> str:
 return f'"{self}"'
 
 
 class Op(Node):
 def serialize(self) -> str:
 return str(self)
 
 
 MarkerVar = Union[Variable, Value]
 MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
 MarkerAtom = Union[MarkerItem, Sequence["MarkerAtom"]]
 MarkerList = Sequence[Union["MarkerList", MarkerAtom, str]]
 
 
 class ParsedRequirement(NamedTuple):
 name: str
 url: str
 extras: list[str]
 specifier: str
 marker: MarkerList | None
 
 
 # --------------------------------------------------------------------------------------
 # Recursive descent parser for dependency specifier
 # --------------------------------------------------------------------------------------
 def parse_requirement(source: str) -> ParsedRequirement:
 return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
 
 
 def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
 """
 requirement = WS? IDENTIFIER WS? extras WS? requirement_details
 """
 tokenizer.consume("WS")
 
 name_token = tokenizer.expect(
 "IDENTIFIER", expected="package name at the start of dependency specifier"
 )
 name = name_token.text
 tokenizer.consume("WS")
 
 extras = _parse_extras(tokenizer)
 tokenizer.consume("WS")
 
 url, specifier, marker = _parse_requirement_details(tokenizer)
 tokenizer.expect("END", expected="end of dependency specifier")
 
 return ParsedRequirement(name, url, extras, specifier, marker)
 
 
 def _parse_requirement_details(
 tokenizer: Tokenizer,
 ) -> tuple[str, str, MarkerList | None]:
 """
 requirement_details = AT URL (WS requirement_marker?)?
 | specifier WS? (requirement_marker)?
 """
 
 specifier = ""
 url = ""
 marker = None
 
 if tokenizer.check("AT"):
 tokenizer.read()
 tokenizer.consume("WS")
 
 url_start = tokenizer.position
 url = tokenizer.expect("URL", expected="URL after @").text
 if tokenizer.check("END", peek=True):
 return (url, specifier, marker)
 
 tokenizer.expect("WS", expected="whitespace after URL")
 
 # The input might end after whitespace.
 if tokenizer.check("END", peek=True):
 return (url, specifier, marker)
 
 marker = _parse_requirement_marker(
 tokenizer, span_start=url_start, after="URL and whitespace"
 )
 else:
 specifier_start = tokenizer.position
 specifier = _parse_specifier(tokenizer)
 tokenizer.consume("WS")
 
 if tokenizer.check("END", peek=True):
 return (url, specifier, marker)
 
 marker = _parse_requirement_marker(
 tokenizer,
 span_start=specifier_start,
 after=(
 "version specifier"
 if specifier
 else "name and no valid version specifier"
 ),
 )
 
 return (url, specifier, marker)
 
 
 def _parse_requirement_marker(
 tokenizer: Tokenizer, *, span_start: int, after: str
 ) -> MarkerList:
 """
 requirement_marker = SEMICOLON marker WS?
 """
 
 if not tokenizer.check("SEMICOLON"):
 tokenizer.raise_syntax_error(
 f"Expected end or semicolon (after {after})",
 span_start=span_start,
 )
 tokenizer.read()
 
 marker = _parse_marker(tokenizer)
 tokenizer.consume("WS")
 
 return marker
 
 
 def _parse_extras(tokenizer: Tokenizer) -> list[str]:
 """
 extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
 """
 if not tokenizer.check("LEFT_BRACKET", peek=True):
 return []
 
 with tokenizer.enclosing_tokens(
 "LEFT_BRACKET",
 "RIGHT_BRACKET",
 around="extras",
 ):
 tokenizer.consume("WS")
 extras = _parse_extras_list(tokenizer)
 tokenizer.consume("WS")
 
 return extras
 
 
 def _parse_extras_list(tokenizer: Tokenizer) -> list[str]:
 """
 extras_list = identifier (wsp* ',' wsp* identifier)*
 """
 extras: list[str] = []
 
 if not tokenizer.check("IDENTIFIER"):
 return extras
 
 extras.append(tokenizer.read().text)
 
 while True:
 tokenizer.consume("WS")
 if tokenizer.check("IDENTIFIER", peek=True):
 tokenizer.raise_syntax_error("Expected comma between extra names")
 elif not tokenizer.check("COMMA"):
 break
 
 tokenizer.read()
 tokenizer.consume("WS")
 
 extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma")
 extras.append(extra_token.text)
 
 return extras
 
 
 def _parse_specifier(tokenizer: Tokenizer) -> str:
 """
 specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
 | WS? version_many WS?
 """
 with tokenizer.enclosing_tokens(
 "LEFT_PARENTHESIS",
 "RIGHT_PARENTHESIS",
 around="version specifier",
 ):
 tokenizer.consume("WS")
 parsed_specifiers = _parse_version_many(tokenizer)
 tokenizer.consume("WS")
 
 return parsed_specifiers
 
 
 def _parse_version_many(tokenizer: Tokenizer) -> str:
 """
 version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
 """
 parsed_specifiers = ""
 while tokenizer.check("SPECIFIER"):
 span_start = tokenizer.position
 parsed_specifiers += tokenizer.read().text
 if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
 tokenizer.raise_syntax_error(
 ".* suffix can only be used with `==` or `!=` operators",
 span_start=span_start,
 span_end=tokenizer.position + 1,
 )
 if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
 tokenizer.raise_syntax_error(
 "Local version label can only be used with `==` or `!=` operators",
 span_start=span_start,
 span_end=tokenizer.position,
 )
 tokenizer.consume("WS")
 if not tokenizer.check("COMMA"):
 break
 parsed_specifiers += tokenizer.read().text
 tokenizer.consume("WS")
 
 return parsed_specifiers
 
 
 # --------------------------------------------------------------------------------------
 # Recursive descent parser for marker expression
 # --------------------------------------------------------------------------------------
 def parse_marker(source: str) -> MarkerList:
 return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
 
 
 def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList:
 retval = _parse_marker(tokenizer)
 tokenizer.expect("END", expected="end of marker expression")
 return retval
 
 
 def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
 """
 marker = marker_atom (BOOLOP marker_atom)+
 """
 expression = [_parse_marker_atom(tokenizer)]
 while tokenizer.check("BOOLOP"):
 token = tokenizer.read()
 expr_right = _parse_marker_atom(tokenizer)
 expression.extend((token.text, expr_right))
 return expression
 
 
 def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
 """
 marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
 | WS? marker_item WS?
 """
 
 tokenizer.consume("WS")
 if tokenizer.check("LEFT_PARENTHESIS", peek=True):
 with tokenizer.enclosing_tokens(
 "LEFT_PARENTHESIS",
 "RIGHT_PARENTHESIS",
 around="marker expression",
 ):
 tokenizer.consume("WS")
 marker: MarkerAtom = _parse_marker(tokenizer)
 tokenizer.consume("WS")
 else:
 marker = _parse_marker_item(tokenizer)
 tokenizer.consume("WS")
 return marker
 
 
 def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
 """
 marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
 """
 tokenizer.consume("WS")
 marker_var_left = _parse_marker_var(tokenizer)
 tokenizer.consume("WS")
 marker_op = _parse_marker_op(tokenizer)
 tokenizer.consume("WS")
 marker_var_right = _parse_marker_var(tokenizer)
 tokenizer.consume("WS")
 return (marker_var_left, marker_op, marker_var_right)
 
 
 def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar:
 """
 marker_var = VARIABLE | QUOTED_STRING
 """
 if tokenizer.check("VARIABLE"):
 return process_env_var(tokenizer.read().text.replace(".", "_"))
 elif tokenizer.check("QUOTED_STRING"):
 return process_python_str(tokenizer.read().text)
 else:
 tokenizer.raise_syntax_error(
 message="Expected a marker variable or quoted string"
 )
 
 
 def process_env_var(env_var: str) -> Variable:
 if env_var in ("platform_python_implementation", "python_implementation"):
 return Variable("platform_python_implementation")
 else:
 return Variable(env_var)
 
 
 def process_python_str(python_str: str) -> Value:
 value = ast.literal_eval(python_str)
 return Value(str(value))
 
 
 def _parse_marker_op(tokenizer: Tokenizer) -> Op:
 """
 marker_op = IN | NOT IN | OP
 """
 if tokenizer.check("IN"):
 tokenizer.read()
 return Op("in")
 elif tokenizer.check("NOT"):
 tokenizer.read()
 tokenizer.expect("WS", expected="whitespace after 'not'")
 tokenizer.expect("IN", expected="'in' after 'not'")
 return Op("not in")
 elif tokenizer.check("OP"):
 return Op(tokenizer.read().text)
 else:
 return tokenizer.raise_syntax_error(
 "Expected marker operator, one of "
 "<=, <, !=, ==, >=, >, ~=, ===, in, not in"
 )
 
 |