class Rack::Multipart::Parser

Constants

BOUNDARY_REGEX
BOUNDARY_START_LIMIT
BUFFERED_UPLOAD_BYTESIZE_LIMIT
BUFSIZE
CHARSET
EMPTY
MIME_HEADER_BYTESIZE_LIMIT
MultipartInfo
TEMPFILE_FACTORY
TEXT_PLAIN

Attributes

state[R]

Public Class Methods

new(boundary, tempfile, bufsize, query_parser) click to toggle source
# File lib/rack/multipart/parser.rb, line 201
def initialize(boundary, tempfile, bufsize, query_parser)
  @query_parser   = query_parser
  @params         = query_parser.make_params
  @boundary       = "--#{boundary}"
  @bufsize        = bufsize

  @full_boundary = @boundary
  @end_boundary = @boundary + '--'
  @state = :FAST_FORWARD
  @mime_index = 0
  @body_retained = nil
  @retained_size = 0
  @collector = Collector.new tempfile

  @sbuf = StringScanner.new("".dup)
  @body_regex = /(?:#{EOL})?#{Regexp.quote(@boundary)}(?:#{EOL}|--)/m
  @end_boundary_size = boundary.bytesize + 6 # (-- at start, -- at finish, EOL at end)
  @rx_max_size = EOL.size + @boundary.bytesize + [EOL.size, '--'.size].max
  @head_regex = /(.*?#{EOL})#{EOL}/m
end
parse(io, content_length, content_type, tmpfile, bufsize, qp) click to toggle source
# File lib/rack/multipart/parser.rb, line 87
def self.parse(io, content_length, content_type, tmpfile, bufsize, qp)
  return EMPTY if 0 == content_length

  boundary = parse_boundary content_type
  return EMPTY unless boundary

  io = BoundedIO.new(io, content_length) if content_length
  outbuf = String.new

  parser = new(boundary, tmpfile, bufsize, qp)
  parser.on_read io.read(bufsize, outbuf)

  loop do
    break if parser.state == :DONE
    parser.on_read io.read(bufsize, outbuf)
  end

  io.rewind
  parser.result
end
parse_boundary(content_type) click to toggle source
# File lib/rack/multipart/parser.rb, line 80
def self.parse_boundary(content_type)
  return unless content_type
  data = content_type.match(MULTIPART)
  return unless data
  data[1]
end

Public Instance Methods

on_read(content) click to toggle source
# File lib/rack/multipart/parser.rb, line 222
def on_read(content)
  handle_empty_content!(content)
  @sbuf.concat content
  run_parser
end
result() click to toggle source
# File lib/rack/multipart/parser.rb, line 228
def result
  @collector.each do |part|
    part.get_data do |data|
      tag_multipart_encoding(part.filename, part.content_type, part.name, data)
      @query_parser.normalize_params(@params, part.name, data, @query_parser.param_depth_limit)
    end
  end
  MultipartInfo.new @params.to_params_hash, @collector.find_all(&:file?).map(&:body)
end

Private Instance Methods

consume_boundary() click to toggle source

Scan until the we find the start or end of the boundary. If we find it, return the appropriate symbol for the start or end of the boundary. If we don't find the start or end of the boundary, clear the buffer and return nil.

# File lib/rack/multipart/parser.rb, line 358
def consume_boundary
  while read_buffer = @sbuf.scan_until(BOUNDARY_REGEX)
    case read_buffer.strip
    when full_boundary then return :BOUNDARY
    when @end_boundary then return :END_BOUNDARY
    end
    return if @sbuf.eos?
  end
end
full_boundary() click to toggle source
# File lib/rack/multipart/parser.rb, line 345
def full_boundary; @full_boundary; end
get_filename(head) click to toggle source
# File lib/rack/multipart/parser.rb, line 368
def get_filename(head)
  filename = nil
  case head
  when RFC2183
    params = Hash[*head.scan(DISPPARM).flat_map(&:compact)]

    if filename = params['filename']
      filename = $1 if filename =~ /^"(.*)"$/
    elsif filename = params['filename*']
      encoding, _, filename = filename.split("'", 3)
    end
  when BROKEN
    filename = $1
    filename = $1 if filename =~ /^"(.*)"$/
  end

  return unless filename

  if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) }
    filename = Utils.unescape_path(filename)
  end

  filename.scrub!

  if filename !~ /\\[^\\"]/
    filename = filename.gsub(/\\(.)/, '\1')
  end

  if encoding
    filename.force_encoding ::Encoding.find(encoding)
  end

  filename
end
handle_consume_token() click to toggle source
# File lib/rack/multipart/parser.rb, line 277
def handle_consume_token
  tok = consume_boundary
  # break if we're at the end of a buffer, but not if it is the end of a field
  @state = if tok == :END_BOUNDARY || (@sbuf.eos? && tok != :BOUNDARY)
    :DONE
  else
    :MIME_HEAD
  end
end
handle_empty_content!(content) click to toggle source
# File lib/rack/multipart/parser.rb, line 433
def handle_empty_content!(content)
  if content.nil? || content.empty?
    raise EOFError
  end
end
handle_fast_forward() click to toggle source
# File lib/rack/multipart/parser.rb, line 257
def handle_fast_forward
  tok = consume_boundary

  if tok == :END_BOUNDARY && @sbuf.pos == @end_boundary_size && @sbuf.eos?
    # stop parsing a buffer if a buffer is only an end boundary.
    @state = :DONE
  elsif tok
    @state = :MIME_HEAD
  else
    raise EOFError, "bad content body" if @sbuf.rest_size >= @bufsize

    # We raise if we don't find the multipart boundary, to avoid unbounded memory
    # buffering. Note that the actual limit is the higher of 16KB and the buffer size (1MB by default)
    raise EOFError, "multipart boundary not found within limit" if @sbuf.string.bytesize > BOUNDARY_START_LIMIT

    # no boundary found, keep reading data
    return :want_read
  end
end
handle_mime_body() click to toggle source
# File lib/rack/multipart/parser.rb, line 323
def handle_mime_body
  if (body_with_boundary = @sbuf.check_until(@body_regex)) # check but do not advance the pointer yet
    body = body_with_boundary.sub(/#{@body_regex}\z/m, '') # remove the boundary from the string
    update_retained_size(body.bytesize) if @body_retained
    @collector.on_mime_body @mime_index, body
    @sbuf.pos += body.length + 2 # skip \r\n after the content
    @state = :CONSUME_TOKEN
    @mime_index += 1
  else
    # Save what we have so far
    if @rx_max_size < @sbuf.rest_size
      delta = @sbuf.rest_size - @rx_max_size
      body = @sbuf.peek(delta)
      update_retained_size(body.bytesize) if @body_retained
      @collector.on_mime_body @mime_index, body
      @sbuf.pos += delta
      @sbuf.string = @sbuf.rest
    end
    :want_read
  end
end
handle_mime_head() click to toggle source
# File lib/rack/multipart/parser.rb, line 287
def handle_mime_head
  if @sbuf.scan_until(@head_regex)
    head = @sbuf[1]
    content_type = head[MULTIPART_CONTENT_TYPE, 1]
    if name = head[MULTIPART_CONTENT_DISPOSITION, 1]
      name = Rack::Auth::Digest::Params::dequote(name)
    else
      name = head[MULTIPART_CONTENT_ID, 1]
    end

    filename = get_filename(head)

    if name.nil? || name.empty?
      name = filename || "#{content_type || TEXT_PLAIN}[]".dup
    end

    # Mime part head data is retained for both TempfilePart and BufferPart
    # for the entireity of the parse, even though it isn't used for BufferPart.
    update_retained_size(head.bytesize)

    # If a filename is given, a TempfilePart will be used, so the body will
    # not be buffered in memory. However, if a filename is not given, a BufferPart
    # will be used, and the body will be buffered in memory.
    @body_retained = !filename

    @collector.on_mime_head @mime_index, head, filename, content_type, name
    @state = :MIME_BODY
  else
    # We raise if the mime part header is too large, to avoid unbounded memory
    # buffering. Note that the actual limit is the higher of 64KB and the buffer size (1MB by default)
    raise EOFError, "multipart mime part header too large" if @sbuf.string.bytesize > MIME_HEADER_BYTESIZE_LIMIT

    return :want_read
  end
end
run_parser() click to toggle source
# File lib/rack/multipart/parser.rb, line 240
def run_parser
  loop do
    case @state
    when :FAST_FORWARD
      break if handle_fast_forward == :want_read
    when :CONSUME_TOKEN
      break if handle_consume_token == :want_read
    when :MIME_HEAD
      break if handle_mime_head == :want_read
    when :MIME_BODY
      break if handle_mime_body == :want_read
    when :DONE
      break
    end
  end
end
tag_multipart_encoding(filename, content_type, name, body) click to toggle source
# File lib/rack/multipart/parser.rb, line 405
def tag_multipart_encoding(filename, content_type, name, body)
  name = name.to_s
  encoding = Encoding::UTF_8

  name.force_encoding(encoding)

  return if filename

  if content_type
    list         = content_type.split(';')
    type_subtype = list.first
    type_subtype.strip!
    if TEXT_PLAIN == type_subtype
      rest = list.drop 1
      rest.each do |param|
        k, v = param.split('=', 2)
        k.strip!
        v.strip!
        v = v[1..-2] if v.start_with?('"') && v.end_with?('"')
        encoding = Encoding.find v if k == CHARSET
      end
    end
  end

  name.force_encoding(encoding)
  body.force_encoding(encoding)
end
update_retained_size(size) click to toggle source
# File lib/rack/multipart/parser.rb, line 347
def update_retained_size(size)
  @retained_size += size
  if @retained_size > BUFFERED_UPLOAD_BYTESIZE_LIMIT
    raise EOFError, "multipart data over retained size limit"
  end
end