This is the original source reformatted in a single-page book edition (using the Manuscripts format).
See the source repo for how the book gets auto-built with "plain" Jekyll - of course - and hosted on GitHub Pages.
Onwards.
A bunch of examples of various use cases with Ruby’s builtin Net::HTTP
library.
require "uri"
uri = URI.parse("http://mysite.com/some_api")
uri = URI.parse("https://mysite.com/thing?foo=bar")
# URI will also guess the correct port
URI.parse("http://foo.com").port # => 80
URI.parse("https://foo.com/").port # => 443
# Full reference
uri = URI.parse("http://foo.com/this/is/everything?query=params")
# p (uri.methods - Object.methods).sort
p uri.scheme # => "http"
p uri.host # => "foo.com"
p uri.port # => 80
p uri.request_uri # => "/this/is/everything?query=params"
p uri.path # => "/this/is/everything"
p uri.query # => "query=params"
# There are setters as well
uri.port = 8080
uri.host = "google.com"
uri.scheme = "ftp"
p uri.to_s
# => "ftp://google.com:8080/this/is/everything?query=param"
require "net/http"
require "uri"
uri = URI.parse("http://google.com/")
# Shortcut
response = Net::HTTP.get_response(uri)
# Will print response.body
Net::HTTP.get_print(uri)
# Full
http = Net::HTTP.new(uri.host, uri.port)
response = http.request(Net::HTTP::Get.new(uri.request_uri))
# You don't have to use URI.parse
require "net/http"
http = Net::HTTP.new("google.com", 80)
response = http.request(Net::HTTP::Get.new("/foo/bar"))
require "net/http"
require "uri"
uri = URI.parse("http://google.com/")
http = Net::HTTP.new(uri.host, uri.port)
request = Net::HTTP::Get.new(uri.request_uri)
response = http.request(request)
response.code # => 301
response.body # => The body (HTML, XML, blob, whatever)
# Headers are lowercased
response["cache-control"] # => public, max-age=2592000
# Listing all headers
response.each_header { |h| do_something(h, response[h]) } # => location = http://www.google.com/
# => content-type = text/html; charset=UTF-8
# => cache-control = public, max-age=2592000
# etc...
require "net/http"
require "uri"
uri = URI.parse("http://google.com/")
http = Net::HTTP.new(uri.host, uri.port)
request = Net::HTTP::Get.new(uri.request_uri)
request["User-Agent"] = "My Ruby Script"
request["Accept"] = "*/*"
response = http.request(request)
# Get specific header
response["content-type"]
# => "text/html; charset=UTF-8"
# Iterate all response headers.
response.each_header do |key, value|
p "#{key} => #{value}"
end
# => "location => http://www.google.com/"
# => "content-type => text/html; charset=UTF-8"
# ...
# Alternatively, reach into private APIs.
p response.instance_variable_get("@header")
# => {"location"=>["http://www.google.com/"], "content-type"=>["text/html; charset=UTF-8"], ...}
require "net/http"
require "uri"
uri = URI.parse("http://translate.google.com/")
http = Net::HTTP.new(uri.host, uri.port)
# make first call to get cookies
request = Net::HTTP::Get.new(uri.request_uri)
response = http.request(request)
# save cookies
cookies = response.response['set-cookie']
# make second call
request = Net::HTTP::Get.new('/#auto|en|Pardon')
# add previously stored cookies
request['Cookie'] = cookies
response = http.request(request)
cookies = response.response['set-cookie'] # => nil
require "net/http"
require "uri"
uri = URI.parse("http://google.com/")
http = Net::HTTP.new(uri.host, uri.port)
request = Net::HTTP::Get.new(uri.request_uri)
request.basic_auth("username", "password")
response = http.request(request)
require 'net/http'
require 'uri'
uri = URI.parse('http://google.com')
# Net::HTTP will automatically create a proxy from the http_proxy environment variable if it is present.
ENV['http_proxy'] # => "http://myproxy.com:8080"
http = Net::HTTP.new(uri.host, uri.port)
# This request uses proxy.
request = Net::HTTP::Get.new(uri.request_uri)
response = http.request(request)
# But it does not work without a Net::HTTP object.
# This request doest not use proxy.
response = Net::HTTP.get_response(uri)
# You can pass proxy address to Net::HTTP constructor too.
proxy_uri = URI.parse('http://myproxy.com:8080')
http = Net::HTTP.new(uri.host, uri.port, proxy_uri.host, proxy_uri.port)
request = Net::HTTP::Get.new(uri.request_uri)
response = http.request(request)
# If you are using an authenticated proxy, use Net::HTTP.start method.
Net::HTTP.start(uri.host, uri.port, proxy_uri.host, proxy_uri.port, 'proxy_user', 'proxy_pass') do |http|
request = Net::HTTP::Get.new(uri.request_uri)
response = http.request(request)
end
# If you want to reuse Net::HTTP instance, don't forget to finish HTTP connection.
http = Net::HTTP.start(uri.host, uri.port, proxy_uri.host, proxy_uri.port, 'proxy_user', 'proxy_pass').start
request = Net::HTTP::Get.new(uri.request_uri)
response = http.request(request)
# Finish HTTP connection.
http.finish if http.started?
require "net/http"
require "uri"
uri = URI.parse("http://example.com/search")
# Shortcut
response = Net::HTTP.post_form(uri, {"q" => "My query", "per_page" => "50"})
# Full control
http = Net::HTTP.new(uri.host, uri.port)
request = Net::HTTP::Post.new(uri.request_uri)
request.set_form_data({"q" => "My query", "per_page" => "50"})
# Tweak headers, removing this will default to application/x-www-form-urlencoded
request["Content-Type"] = "application/json"
response = http.request(request)
require "net/http"
require "uri"
# Token used to terminate the file in the post body. Make sure it is not
# present in the file you're uploading.
BOUNDARY = "AaB03x"
uri = URI.parse("http://something.com/uploads")
file = "/path/to/your/testfile.txt"
post_body = []
post_body << "--#{BOUNDARY}\r\n"
post_body << "Content-Disposition: form-data; name=\"datafile\"; filename=\"#{File.basename(file)}\"\r\n"
post_body << "Content-Type: text/plain\r\n"
post_body << "\r\n"
post_body << File.read(file)
post_body << "\r\n--#{BOUNDARY}--\r\n"
http = Net::HTTP.new(uri.host, uri.port)
request = Net::HTTP::Post.new(uri.request_uri)
request.body = post_body.join
request["Content-Type"] = "multipart/form-data, boundary=#{BOUNDARY}"
http.request(request)
# Alternative method, using Nick Sieger's multipart-post gem
require "rubygems"
require "net/http/post/multipart"
request = Net::HTTP::Post::Multipart.new uri.request_uri, "file" => UploadIO.new(file, "application/octet-stream")
http = Net::HTTP.new(uri.host, uri.port)
http.request(request)
# Another alternative, using Rack 1.3 +
require 'rack'
uri = URI.parse("http://something.com/uploads")
http = Net::HTTP.new(uri.host, uri.port)
request = Net::HTTP::Post.new(uri.request_uri)
request.body = Rack::Multipart::Generator.new(
"form_text_field" => "random text here",
"file" => Rack::Multipart::UploadedFile.new(path_to_file, file_mime_type)
).dump
request.content_type = "multipart/form-data, boundary=#{Rack::Multipart::MULTIPART_BOUNDARY}"
http.request(request)
http.start do |connection|
response = retrying_request(connection, request)
end
require "net/https"
require "uri"
# A regular-ish https request.
#
# ssltest7.bbtest.net is Thawte's SSL test site. Net::HTTP will use the CA
# certificates installed on your system by default, which most likely includes
# the Thawte cert that signed ssltest7.bbtest.net.
http = Net::HTTP.new("ssltest7.bbtest.net", 443)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
response = http.request(Net::HTTP::Get.new("/"))
response.body
response.status
# .. do normal Net::HTTP response stuff here (see separate cheat sheet entry)
# You can specify custom CA certs. If your production system only connects to
# one particular server, you should specify these, and bundle them with your
# app, so that you don't depend OS level pre-installed certificates in the
# production environment.
http = Net::HTTP.new("verysecure.com", 443)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
store = OpenSSL::X509::Store.new
store.set_default_paths # Optional method that will auto-include the system CAs.
store.add_cert(OpenSSL::X509::Certificate.new(File.read("/path/to/ca1.crt")))
store.add_cert(OpenSSL::X509::Certificate.new(File.read("/path/to/ca2.crt")))
store.add_file("/path/to/ca3.crt") # Alternative syntax for adding certs.
http.cert_store = store
response = http.request(Net::HTTP::Get.new("/"))
# Client certificate example. Some servers use this to authorize the connecting
# client, i.e. you. The server you connect to gets the certificate you specify,
# and they can use it to check who signed the certificate, and use the
# certificate fingerprint to identify exactly which certificate you're using.
http = Net::HTTP.new("verysecure.com", 443)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
http.key = OpenSSL::PKey::RSA.new(File.read("/path/to/client.key"), "optional passphrase argument")
http.cert = OpenSSL::X509::Certificate.new(File.read("/path/to/client.crt"))
response = http.request(Net::HTTP::Get.new("/"))
# You can also skip verification. This is almost certainly a bad idea, read more
# here:
# http://www.rubyinside.com/how-to-cure-nethttps-risky-default-https-behavior-4010.html
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
# Basic REST.
# Most REST APIs will set semantic values in response.body and response.code.
require "net/http"
http = Net::HTTP.new("api.restsite.com")
request = Net::HTTP::Post.new("/users")
request.set_form_data({"users[login]" => "quentin"})
# or
request.body = '{"username":"quentin"}'
response = http.request(request) # Use nokogiri, hpricot, etc to parse response.body.
request = Net::HTTP::Get.new("/users/1")
response = http.request(request) # As with POST, the data is in response.body.
request = Net::HTTP::Put.new("/users/1")
request.set_form_data({"users[login]" => "changed"})
response = http.request(request)
request = Net::HTTP::Delete.new("/users/1")
response = http.request(request)
require "net/http"
# Varnish uses a custom PURGE verb. A simple subclass is all it takes for
# Net::HTTP to send requests with this method.
class Purge < Net::HTTPRequest
METHOD = "PURGE"
REQUEST_HAS_BODY = false
RESPONSE_HAS_BODY = false
end
http = Net::HTTP.new("localhost", "80")
response = http.request(Purge.new("/"))
require "net/http"
require "uri"
uri = URI.parse("http://google.com/")
http = Net::HTTP.new(uri.host, uri.port)
http.open_timeout = 3 # in seconds
http.read_timeout = 3 # in seconds
http.request(Net::HTTP::Get.new(uri.request_uri))
require "net/http"
require "uri"
uri = URI.parse("http://google.com/")
http = Net::HTTP.new(uri.host, uri.port)
http.set_debug_output($stdout)
# or
http.set_debug_output($stderr)
# or
require "logger"
http.set_debug_output(Logger.new("/path/to/my.log"))
response = http.request(Net::HTTP::Get.new(uri.request_uri))
# All the APIs in Net::HTTP are synchronous.
# We have to use threads.
require "net/http"
require "uri"
Thread.new do
# Do normal Net::HTTP stuff here.
uri = URI.parse("http://google.com/")
http = Net::HTTP.new(uri.host, uri.port)
response = http.request(Net::HTTP::Get.new(uri.request_uri))
end.join
All together now. fetcher library /gem (web: rubylibs/fetcher, gem: fetcher) - Fetch text documents or binary blobs via HTTP or HTTPS. Usage examples:
Fetcher.copy( 'https://raw.github.com/openfootball/at-austria/master/2013_14/bl.txt', '/tmp/bl.txt' )
# -or-
worker = Fetcher::Worker.new
worker.copy( 'https://raw.github.com/openfootball/at-austria/master/2013_14/bl.txt', '/tmp/bl.txt' )
txt = Fetcher.read( 'https://raw.github.com/openfootball/at-austria/master/2013_14/bl.txt' )
# -or-
worker = Fetcher::Worker.new
txt = worker.read( 'https://raw.github.com/openfootball/at-austria/master/2013_14/bl.txt' )
Note: The method read
will return a string.
response = Fetcher.get( 'https://raw.github.com/openfootball/at-austria/master/2013_14/bl.txt' )
# -or-
worker = Fetcher::Worker.new
response = worker.get( 'https://raw.github.com/openfootball/at-austria/master/2013_14/bl.txt' )
Note: The method get
will return a Net::HTTPResponse
object
(lets you use code, headers, body, etc.).
puts response.code # => '404'
# Note: Returned (status) code is a string e.g. '404'
puts response.message # => 'Not Found'
puts response.body
puts response.content_type # => 'text/html; charset=UTF-8'
puts response['content-type'] # => 'text/html; charset=UTF-8'
# Note: Headers are always downcased
# e.g. use 'content-type' not 'Content-Type'
fetch version 0.5.0 - Lets you fetch text documents or binary blobs via HTTP, HTTPS.
Usage: fetch [options] URI
-o, --output PATH Output Path (default is '.')
-v, --verbose Show debug trace
Examples:
fetch https://raw.github.com/openfootball/at-austria/master/2013_14/bl.txt
fetch -o downloads https://raw.github.com/openfootball/at-austria/master/2013_14/bl.txt
module Fetcher
class Error < StandardError
end
class HttpError < Error
attr_reader :code, :message
def initialize( code, message )
@code, @message = code, message
end
def to_s
"HTTP request failed (NOK) => #{@code} #{@message}"
end
end
class Worker
include LogUtils::Logging
def initialize
### cache for conditional get (e.g. etags and last-modified headers/checks)
@cache = {}
@use_cache = false
end
## note: use cache[ uri ] = hash for headers+plus body+plus code(410,etc.)
# cache[ uri ]
def clear_cache() @cache = {}; end
def cache() @cache; end
def use_cache=(true_or_false) @use_cache=true_or_false; end # true|false
def use_cache?() @use_cache; end
def get( src )
# return HTTPResponse (code,message,body,etc.)
logger.debug "fetch - get(_response) src: #{src}"
get_response( src )
end
def read( src )
# return contents (response body) as (ascii/binary) string
logger.debug "fetch - copy src: #{src} into string"
response = get_response( src )
# on error return empty string; - check: better return nil- why? why not??
if response.code != '200'
raise HttpError.new( response.code, response.message )
end
response.body.dup # return string copy - why? why not?? (use to_s?)
end
def read_blob!( src )
## note: same as read for now
read( src )
end
def read_utf8!( src )
# return contents (response body) a string
logger.debug "fetch - copy src: #{src} into utf8 string"
response = get_response( src )
# on error throw exception - why? why not??
if response.code != '200'
raise HttpError.new( response.code, response.message )
end
###
# Note: Net::HTTP will NOT set encoding UTF-8 etc.
# will be set to ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
# thus, set/force encoding to utf-8
txt = response.body.to_s
txt = txt.force_encoding( Encoding::UTF_8 )
txt
end
def copy( src, dest, opts={} )
## todo: add file protocol - why? why not??
logger.debug "fetch - copy src: #{src} to dest: #{dest}"
response = get_response( src )
# NOTE: on error (NOK) raise exception; do NOT copy file; sorry
if response.code != '200'
raise HttpError.new( response.code, response.message )
end
### check:
## why not always use wb???
## how is it differet for text files?
## will convert newlines (from windows to unix) ???
# check for content type; use 'wb' for images
if response.content_type =~ /image/ ||
response.content_type =~ /zip/ ## use application/zip or something - why? why not??
logger.debug ' switching to binary'
mode = 'wb'
else
mode = 'w'
end
mode = opts[:mode] if opts[:mode] # if mode flags passed in -take precedence
File.open( dest, mode ) do |f|
f.write( response.body )
end
end
def get_response( src )
uri = URI.parse( src )
# new code: honor proxy env variable HTTP_PROXY
proxy = ENV['HTTP_PROXY']
proxy = ENV['http_proxy'] if proxy.nil? # try possible lower/case env variable (for *nix systems) is this necessary??
if proxy
proxy = URI.parse( proxy )
logger.debug "using net http proxy: proxy.host=#{proxy.host}, proxy.port=#{proxy.port}"
if proxy.user && proxy.password
logger.debug " using credentials: proxy.user=#{proxy.user}, proxy.password=****"
else
logger.debug " using no credentials"
end
else
logger.debug "using direct net http access; no proxy configured"
proxy = OpenStruct.new # all fields return nil (e.g. proxy.host, etc.)
end
http_proxy = Net::HTTP::Proxy( proxy.host, proxy.port, proxy.user, proxy.password )
redirect_limit = 6
response = nil
until false
raise ArgumentError, 'HTTP redirect too deep' if redirect_limit == 0
redirect_limit -= 1
http = http_proxy.new( uri.host, uri.port )
logger.debug "GET #{uri.request_uri} uri=#{uri}, redirect_limit=#{redirect_limit}"
headers = { 'User-Agent' => "fetcher gem v#{VERSION}" }
if use_cache?
## check for existing cache entry in cache store (lookup by uri)
## todo/fix: normalize uri!!!! - how?
## - remove query_string ?? fragement ?? why? why not??
## note: using uri.to_s should return full uri e.g. http://example.com/page.html
cache_entry = cache[ uri.to_s ]
if cache_entry
logger.info "found cache entry for >#{uri.to_s}<"
if cache_entry['etag']
logger.info "adding header If-None-Match (etag) >#{cache_entry['etag']}< for conditional GET"
headers['If-None-Match'] = cache_entry['etag']
end
if cache_entry['last-modified']
logger.info "adding header If-Modified-Since (last-modified) >#{cache_entry['last-modified']}< for conditional GET"
headers['If-Modified-Since'] = cache_entry['last-modified']
end
end
end
request = Net::HTTP::Get.new( uri.request_uri, headers )
if uri.instance_of? URI::HTTPS
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
end
response = http.request( request )
if response.code == '200'
logger.debug "#{response.code} #{response.message}"
logger.debug " content_type: #{response.content_type}, content_length: #{response.content_length}"
break # will return response
elsif( response.code == '304' ) # -- Not Modified - for conditional GETs (using etag,last-modified)
logger.debug "#{response.code} #{response.message}"
break # will return response
elsif( response.code == '301' || response.code == '302' || response.code == '303' || response.code == '307' )
# 301 = moved permanently
# 302 = found
# 303 = see other
# 307 = temporary redirect
logger.debug "#{response.code} #{response.message} location=#{response.header['location']}"
newuri = URI.parse( response.header['location'] )
if newuri.relative?
logger.debug "url relative; try to make it absolute"
newuri = uri + response.header['location']
end
uri = newuri
else
puts "*** error - fetch HTTP - #{response.code} #{response.message}"
break # will return response
end
end
response
end # method copy
end # class Worker
end # module Fetcher
(Source: rubylibs/fetcher/lib/fetcher/worker.rb)