Class RDig::HttpDocument
In: lib/rdig/documents.rb
Parent: Document

Remote Document to be retrieved by HTTP

Methods

fetch   new  

Attributes

etag  [R] 
referring_uri  [R] 
status  [R] 

Public Class methods

url: url of this document, may be relative to the referring doc or host. referrer: uri of the document we retrieved this link from

[Source]

     # File lib/rdig/documents.rb, line 106
106:     def initialize(args={})
107:       super(args)
108:       @referring_uri = args[:referrer]
109:     end

Public Instance methods

[Source]

     # File lib/rdig/documents.rb, line 111
111:     def fetch
112:       puts "fetching #{@uri.to_s}" if RDig::config.verbose
113:       open(@uri.to_s) do |doc|
114:         case doc.status.first.to_i
115:         when 200
116:           @etag = doc.meta['etag']
117:           # puts "etag: #{@etag}"
118:           @content = ContentExtractors.process(doc.read, doc.content_type)
119:           @status = :success
120:         when 404
121:           puts "got 404 for #{@uri}"
122:         else
123:           puts "don't know what to do with response: #{doc.status.join(' : ')}"
124:         end
125:       end
126:     rescue
127:       puts "error fetching #{@uri.to_s}: #{$!}" if RDig::config.verbose
128:     ensure
129:       @content ||= {}
130:     end

[Validate]