Yuki & Moto Press

Net::HTTP by Example / Net::HTTP Cheat Sheet (Book Edition)

by August Lilleaas, et al

Contents

Notes

This is the original source reformatted in a single-page book edition (using the Manuscripts format).

See the source repo for how the book gets auto-built with "plain" Jekyll - of course - and hosted on GitHub Pages.

Onwards.

Net::HTTP Basics

Contents

A bunch of examples of various use cases with Ruby’s builtin Net::HTTP library.

Univeral Resource Identifiers (URIs)

require "uri"

uri = URI.parse("http://mysite.com/some_api")
uri = URI.parse("https://mysite.com/thing?foo=bar")

# URI will also guess the correct port
URI.parse("http://foo.com").port # => 80
URI.parse("https://foo.com/").port # => 443

# Full reference
uri = URI.parse("http://foo.com/this/is/everything?query=params")
# p (uri.methods - Object.methods).sort
p uri.scheme        # => "http"
p uri.host          # => "foo.com"
p uri.port          # => 80
p uri.request_uri   # => "/this/is/everything?query=params"
p uri.path          # => "/this/is/everything"
p uri.query         # => "query=params"

# There are setters as well
uri.port = 8080
uri.host = "google.com"
uri.scheme = "ftp"
p uri.to_s
# => "ftp://google.com:8080/this/is/everything?query=param"

Standard HTTP Request

require "net/http"
require "uri"

uri = URI.parse("http://google.com/")

# Shortcut
response = Net::HTTP.get_response(uri)

# Will print response.body
Net::HTTP.get_print(uri)

# Full
http = Net::HTTP.new(uri.host, uri.port)
response = http.request(Net::HTTP::Get.new(uri.request_uri))

Without URI

# You don't have to use URI.parse
require "net/http"

http = Net::HTTP.new("google.com", 80)
response = http.request(Net::HTTP::Get.new("/foo/bar"))

Dealing with Response Objects

require "net/http"
require "uri"

uri = URI.parse("http://google.com/")

http = Net::HTTP.new(uri.host, uri.port)
request = Net::HTTP::Get.new(uri.request_uri)

response = http.request(request)

response.code             # => 301
response.body             # => The body (HTML, XML, blob, whatever)
# Headers are lowercased
response["cache-control"] # => public, max-age=2592000

# Listing all headers
response.each_header { |h| do_something(h, response[h]) } # => location = http://www.google.com/
                                                          # => content-type = text/html; charset=UTF-8
                                                          # => cache-control = public, max-age=2592000
                                                          # etc...

Headers

require "net/http"
require "uri"

uri = URI.parse("http://google.com/")
http = Net::HTTP.new(uri.host, uri.port)

request = Net::HTTP::Get.new(uri.request_uri)
request["User-Agent"] = "My Ruby Script"
request["Accept"] = "*/*"

response = http.request(request)

# Get specific header
response["content-type"]
# => "text/html; charset=UTF-8"

# Iterate all response headers.
response.each_header do |key, value|
  p "#{key} => #{value}"
end
# => "location => http://www.google.com/"
# => "content-type => text/html; charset=UTF-8"
# ...

# Alternatively, reach into private APIs.
p response.instance_variable_get("@header")
# => {"location"=>["http://www.google.com/"], "content-type"=>["text/html; charset=UTF-8"], ...}

Cookies

require "net/http"
require "uri"

uri = URI.parse("http://translate.google.com/")
http = Net::HTTP.new(uri.host, uri.port)

# make first call to get cookies
request = Net::HTTP::Get.new(uri.request_uri)

response = http.request(request)

# save cookies
cookies = response.response['set-cookie']  


# make second call
request = Net::HTTP::Get.new('/#auto|en|Pardon')

# add previously stored cookies
request['Cookie'] = cookies

response = http.request(request)

cookies = response.response['set-cookie'] # => nil

Basic Auth

require "net/http"
require "uri"

uri = URI.parse("http://google.com/")

http = Net::HTTP.new(uri.host, uri.port)
request = Net::HTTP::Get.new(uri.request_uri)
request.basic_auth("username", "password")
response = http.request(request)

Proxy

require 'net/http'
require 'uri'

uri = URI.parse('http://google.com')

# Net::HTTP will automatically create a proxy from the http_proxy environment variable if it is present.
ENV['http_proxy'] # => "http://myproxy.com:8080"

http = Net::HTTP.new(uri.host, uri.port)

# This request uses proxy.
request = Net::HTTP::Get.new(uri.request_uri)
response = http.request(request)

# But it does not work without a Net::HTTP object.
# This request doest not use proxy.
response = Net::HTTP.get_response(uri)


# You can pass proxy address to Net::HTTP constructor too.
proxy_uri = URI.parse('http://myproxy.com:8080')

http = Net::HTTP.new(uri.host, uri.port, proxy_uri.host, proxy_uri.port)

request = Net::HTTP::Get.new(uri.request_uri)
response = http.request(request)


# If you are using an authenticated proxy, use Net::HTTP.start method.
Net::HTTP.start(uri.host, uri.port, proxy_uri.host, proxy_uri.port, 'proxy_user', 'proxy_pass') do |http|
  request = Net::HTTP::Get.new(uri.request_uri)
  response = http.request(request)
end

# If you want to reuse Net::HTTP instance, don't forget to finish HTTP connection.
http = Net::HTTP.start(uri.host, uri.port, proxy_uri.host, proxy_uri.port, 'proxy_user', 'proxy_pass').start

request = Net::HTTP::Get.new(uri.request_uri)
response = http.request(request)

# Finish HTTP connection.
http.finish if http.started?

POST Form Request

require "net/http"
require "uri"

uri = URI.parse("http://example.com/search")

# Shortcut
response = Net::HTTP.post_form(uri, {"q" => "My query", "per_page" => "50"})

# Full control
http = Net::HTTP.new(uri.host, uri.port)

request = Net::HTTP::Post.new(uri.request_uri)
request.set_form_data({"q" => "My query", "per_page" => "50"})

# Tweak headers, removing this will default to application/x-www-form-urlencoded
request["Content-Type"] = "application/json"

response = http.request(request)

File Upload - HTML Style (w/ input type=”file”)

require "net/http"
require "uri"

# Token used to terminate the file in the post body. Make sure it is not
# present in the file you're uploading.
BOUNDARY = "AaB03x"

uri = URI.parse("http://something.com/uploads")
file = "/path/to/your/testfile.txt"

post_body = []
post_body << "--#{BOUNDARY}\r\n"
post_body << "Content-Disposition: form-data; name=\"datafile\"; filename=\"#{File.basename(file)}\"\r\n"
post_body << "Content-Type: text/plain\r\n"
post_body << "\r\n"
post_body << File.read(file)
post_body << "\r\n--#{BOUNDARY}--\r\n"

http = Net::HTTP.new(uri.host, uri.port)
request = Net::HTTP::Post.new(uri.request_uri)
request.body = post_body.join
request["Content-Type"] = "multipart/form-data, boundary=#{BOUNDARY}"

http.request(request)

# Alternative method, using Nick Sieger's multipart-post gem
require "rubygems"
require "net/http/post/multipart"

request = Net::HTTP::Post::Multipart.new uri.request_uri, "file" => UploadIO.new(file, "application/octet-stream")
http = Net::HTTP.new(uri.host, uri.port)
http.request(request)

# Another alternative, using Rack 1.3 +
require 'rack'
uri     = URI.parse("http://something.com/uploads")
http    = Net::HTTP.new(uri.host, uri.port)
request = Net::HTTP::Post.new(uri.request_uri)

request.body = Rack::Multipart::Generator.new(
  "form_text_field" => "random text here",
  "file"            => Rack::Multipart::UploadedFile.new(path_to_file, file_mime_type)
).dump
request.content_type = "multipart/form-data, boundary=#{Rack::Multipart::MULTIPART_BOUNDARY}"

http.request(request)

http.start do |connection|
  response = retrying_request(connection, request)
end

SSL/HTTPS Request

require "net/https"
require "uri"

# A regular-ish https request.
#
# ssltest7.bbtest.net is Thawte's SSL test site. Net::HTTP will use the CA
# certificates installed on your system by default, which most likely includes
# the Thawte cert that signed ssltest7.bbtest.net.
http = Net::HTTP.new("ssltest7.bbtest.net", 443)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER

response = http.request(Net::HTTP::Get.new("/"))
response.body
response.status
# .. do normal Net::HTTP response stuff here (see separate cheat sheet entry)

# You can specify custom CA certs. If your production system only connects to
# one particular server, you should specify these, and bundle them with your
# app, so that you don't depend OS level pre-installed certificates in the
# production environment.
http = Net::HTTP.new("verysecure.com", 443)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER

store = OpenSSL::X509::Store.new
store.set_default_paths # Optional method that will auto-include the system CAs.
store.add_cert(OpenSSL::X509::Certificate.new(File.read("/path/to/ca1.crt")))
store.add_cert(OpenSSL::X509::Certificate.new(File.read("/path/to/ca2.crt")))
store.add_file("/path/to/ca3.crt") # Alternative syntax for adding certs.
http.cert_store = store

response = http.request(Net::HTTP::Get.new("/"))


# Client certificate example. Some servers use this to authorize the connecting
# client, i.e. you. The server you connect to gets the certificate you specify,
# and they can use it to check who signed the certificate, and use the
# certificate fingerprint to identify exactly which certificate you're using.
http = Net::HTTP.new("verysecure.com", 443)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
http.key = OpenSSL::PKey::RSA.new(File.read("/path/to/client.key"), "optional passphrase argument")
http.cert = OpenSSL::X509::Certificate.new(File.read("/path/to/client.crt"))

response = http.request(Net::HTTP::Get.new("/"))


# You can also skip verification. This is almost certainly a bad idea, read more
# here:
# http://www.rubyinside.com/how-to-cure-nethttps-risky-default-https-behavior-4010.html
http.verify_mode = OpenSSL::SSL::VERIFY_NONE

HTTP POST / GET / PUT / DELETE Methods

# Basic REST.
# Most REST APIs will set semantic values in response.body and response.code.
require "net/http"

http = Net::HTTP.new("api.restsite.com")

request = Net::HTTP::Post.new("/users")
request.set_form_data({"users[login]" => "quentin"})
# or
request.body = '{"username":"quentin"}'
response = http.request(request) # Use nokogiri, hpricot, etc to parse response.body.


request = Net::HTTP::Get.new("/users/1")
response = http.request(request) # As with POST, the data is in response.body.


request = Net::HTTP::Put.new("/users/1")
request.set_form_data({"users[login]" => "changed"})
response = http.request(request)


request = Net::HTTP::Delete.new("/users/1")
response = http.request(request)

Your Own Custom HTTP Method / Verb

require "net/http"

# Varnish uses a custom PURGE verb. A simple subclass is all it takes for
# Net::HTTP to send requests with this method.

class Purge < Net::HTTPRequest
  METHOD = "PURGE"
  REQUEST_HAS_BODY = false
  RESPONSE_HAS_BODY = false
end

http = Net::HTTP.new("localhost", "80")
response = http.request(Purge.new("/"))

Timeout

require "net/http"
require "uri"

uri = URI.parse("http://google.com/")

http = Net::HTTP.new(uri.host, uri.port)
http.open_timeout = 3 # in seconds
http.read_timeout = 3 # in seconds
http.request(Net::HTTP::Get.new(uri.request_uri))

Logging and Debugging

require "net/http"
require "uri"

uri = URI.parse("http://google.com/")
http = Net::HTTP.new(uri.host, uri.port)

http.set_debug_output($stdout)
# or
http.set_debug_output($stderr)
# or
require "logger"
http.set_debug_output(Logger.new("/path/to/my.log"))

response = http.request(Net::HTTP::Get.new(uri.request_uri))

Asynchronous

# All the APIs in Net::HTTP are synchronous.
# We have to use threads.

require "net/http"
require "uri"

Thread.new do
  # Do normal Net::HTTP stuff here.
  uri = URI.parse("http://google.com/")
  http = Net::HTTP.new(uri.host, uri.port)
  response = http.request(Net::HTTP::Get.new(uri.request_uri))
end.join

Case Study - Fetcher HTTP Library (Incl. Redirects, Caching 'n' More)

Contents

All together now. fetcher library /gem (web: rubylibs/fetcher, gem: fetcher) - Fetch text documents or binary blobs via HTTP or HTTPS. Usage examples:

Copy (to File)

Fetcher.copy( 'https://raw.github.com/openfootball/at-austria/master/2013_14/bl.txt', '/tmp/bl.txt' )

# -or-

worker = Fetcher::Worker.new
worker.copy( 'https://raw.github.com/openfootball/at-austria/master/2013_14/bl.txt', '/tmp/bl.txt' )

Read (into String)

txt = Fetcher.read( 'https://raw.github.com/openfootball/at-austria/master/2013_14/bl.txt' )

# -or-

worker = Fetcher::Worker.new
txt = worker.read( 'https://raw.github.com/openfootball/at-austria/master/2013_14/bl.txt' )

Note: The method read will return a string.

Get (HTTP Response)

response = Fetcher.get( 'https://raw.github.com/openfootball/at-austria/master/2013_14/bl.txt' )

# -or-

worker = Fetcher::Worker.new
response = worker.get( 'https://raw.github.com/openfootball/at-austria/master/2013_14/bl.txt' )

Note: The method get will return a Net::HTTPResponse object (lets you use code, headers, body, etc.).

puts response.code             # => '404'
                               #  Note: Returned (status) code is a string e.g. '404'
puts response.message          # => 'Not Found'
puts response.body
puts response.content_type     # => 'text/html; charset=UTF-8'
puts response['content-type']  # => 'text/html; charset=UTF-8'
                               #  Note: Headers are always downcased
                               #        e.g. use 'content-type' not 'Content-Type'

Command Line

fetch version 0.5.0 - Lets you fetch text documents or binary blobs via HTTP, HTTPS.

Usage: fetch [options] URI
    -o, --output PATH                Output Path (default is '.')
    -v, --verbose                    Show debug trace


Examples:
  fetch https://raw.github.com/openfootball/at-austria/master/2013_14/bl.txt
  fetch -o downloads https://raw.github.com/openfootball/at-austria/master/2013_14/bl.txt

Source

module Fetcher

  class Error < StandardError
  end

  class HttpError < Error
    attr_reader :code, :message

    def initialize( code, message )
      @code, @message = code, message
    end

    def to_s
      "HTTP request failed (NOK) => #{@code} #{@message}"
    end
  end


  class Worker

    include LogUtils::Logging

    def initialize
      ### cache for conditional get (e.g. etags and last-modified headers/checks)
      @cache = {}
      @use_cache = false
    end

    ## note: use cache[ uri ] = hash for headers+plus body+plus code(410,etc.)
    #            cache[ uri ]
    def clear_cache()              @cache = {};              end
    def cache()                    @cache;                   end
    def use_cache=(true_or_false)  @use_cache=true_or_false; end  # true|false
    def use_cache?()               @use_cache;               end


    def get( src )
      # return HTTPResponse (code,message,body,etc.)
      logger.debug "fetch - get(_response) src: #{src}"

      get_response( src )
    end


    def read( src )
      # return contents (response body) as (ascii/binary) string
      logger.debug "fetch - copy src: #{src} into string"

      response = get_response( src )

      # on error return empty string; - check: better return nil- why? why not??
      if response.code != '200'
        raise HttpError.new( response.code, response.message )
      end

      response.body.dup  # return string copy - why? why not?? (use to_s?)
    end

    def read_blob!( src )
      ## note: same as read for now
      read( src )
    end

    def read_utf8!( src )
      # return contents (response body) a string
      logger.debug "fetch - copy src: #{src} into utf8 string"

      response = get_response( src )

      # on error throw exception - why? why not??
      if response.code != '200'
        raise HttpError.new( response.code, response.message )
      end

      ###
      # Note: Net::HTTP will NOT set encoding UTF-8 etc.
      # will be set to ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
      # thus, set/force encoding to utf-8

      txt = response.body.to_s
      txt = txt.force_encoding( Encoding::UTF_8 )
      txt
    end


    def copy( src, dest, opts={} )
      ## todo: add file protocol - why? why not??

      logger.debug "fetch - copy src: #{src} to dest: #{dest}"

      response = get_response( src )

      # NOTE: on error (NOK) raise exception; do NOT copy file; sorry
      if response.code != '200'
        raise HttpError.new( response.code, response.message )
      end

      ### check:
      ## why not always use wb???
      ##  how is it differet for text files?
      ##  will convert newlines (from windows to unix) ???

      # check for content type; use 'wb' for images
      if response.content_type =~ /image/ ||
         response.content_type =~ /zip/    ## use application/zip or something - why? why not??
        logger.debug '  switching to binary'
        mode = 'wb'
      else
        mode = 'w'
      end

      mode = opts[:mode]  if opts[:mode]  # if mode flags passed in -take precedence

      File.open( dest, mode ) do |f|
        f.write( response.body )
      end
    end

    def get_response( src )
      uri = URI.parse( src )

      # new code: honor proxy env variable HTTP_PROXY
      proxy = ENV['HTTP_PROXY']
      proxy = ENV['http_proxy'] if proxy.nil?   # try possible lower/case env variable (for *nix systems) is this necessary??

      if proxy
        proxy = URI.parse( proxy )
        logger.debug "using net http proxy: proxy.host=#{proxy.host}, proxy.port=#{proxy.port}"
        if proxy.user && proxy.password
          logger.debug "  using credentials: proxy.user=#{proxy.user}, proxy.password=****"
        else
          logger.debug "  using no credentials"
        end
      else
        logger.debug "using direct net http access; no proxy configured"
        proxy = OpenStruct.new   # all fields return nil (e.g. proxy.host, etc.)
      end

      http_proxy = Net::HTTP::Proxy( proxy.host, proxy.port, proxy.user, proxy.password )

      redirect_limit = 6
      response = nil

      until false
        raise ArgumentError, 'HTTP redirect too deep' if redirect_limit == 0
        redirect_limit -= 1

        http = http_proxy.new( uri.host, uri.port )

        logger.debug "GET #{uri.request_uri} uri=#{uri}, redirect_limit=#{redirect_limit}"

        headers = { 'User-Agent' => "fetcher gem v#{VERSION}" }

        if use_cache?
          ## check for existing cache entry in cache store (lookup by uri)
          ## todo/fix: normalize uri!!!! - how?
          ##  - remove query_string ?? fragement ?? why? why not??

          ## note:  using uri.to_s  should return full uri e.g. http://example.com/page.html

          cache_entry = cache[ uri.to_s ]
          if cache_entry
            logger.info "found cache entry for >#{uri.to_s}<"
            if cache_entry['etag']
              logger.info "adding header If-None-Match (etag) >#{cache_entry['etag']}< for conditional GET"
              headers['If-None-Match'] = cache_entry['etag']
            end
            if cache_entry['last-modified']
              logger.info "adding header If-Modified-Since (last-modified) >#{cache_entry['last-modified']}< for conditional GET"
              headers['If-Modified-Since'] = cache_entry['last-modified']
            end
          end
        end

        request = Net::HTTP::Get.new( uri.request_uri, headers )
        if uri.instance_of? URI::HTTPS
          http.use_ssl = true
          http.verify_mode = OpenSSL::SSL::VERIFY_NONE
        end

        response   = http.request( request )

        if response.code == '200'
          logger.debug "#{response.code} #{response.message}"
          logger.debug "  content_type: #{response.content_type}, content_length: #{response.content_length}"
          break  # will return response
        elsif( response.code == '304' ) # -- Not Modified - for conditional GETs (using etag,last-modified)
          logger.debug "#{response.code} #{response.message}"
          break  # will return response
        elsif( response.code == '301' || response.code == '302' || response.code == '303' || response.code == '307' )
          # 301 = moved permanently
          # 302 = found
          # 303 = see other
          # 307 = temporary redirect
          logger.debug "#{response.code} #{response.message} location=#{response.header['location']}"
          newuri = URI.parse( response.header['location'] )
          if newuri.relative?
            logger.debug "url relative; try to make it absolute"
            newuri = uri + response.header['location']
          end
          uri = newuri
        else
          puts "*** error - fetch HTTP - #{response.code} #{response.message}"
          break  # will return response
        end
      end

      response
    end # method copy

  end # class Worker

end  # module Fetcher

(Source: rubylibs/fetcher/lib/fetcher/worker.rb)

Fork me on GitHub