Class: ODDB::Interaction::FlockhartPlugin

Inherits:
Plugin show all
Defined in:
src/plugin/flockhart.rb,
test/test_plugin/flockhart.rb

Constant Summary

HTTP_SERVER =
'medicine.iupui.edu'
HTML_PATH =
'/clinpharm/DDIs'
TARGET =
File.expand_path('../../test/data/html/interaction/flockhart', File.dirname(__FILE__))
TABLE =
"table.asp"
[ "1A2references.asp", "2B6references.asp", "2C8references.asp",
"2C9references.asp", "2C19references.asp", "2D6references.asp",
"2E1references.asp", "3A457references.asp" ]
FORMAT_CYT_ID =
{
  "3A457"   => ["3A4", "3A5-7"],
  "3A4,5,7" =>  ["3A4", "3A5-7"],
  "3A,4,5,7"=>  ["3A4", "3A5-7"],
}
[ "clinlist.htm" ]
RETRIES =
3
RETRY_WAIT =
5
IMAGES =
["substrates", "inhibitors", "inducers"]

Constants inherited from Plugin

ARCHIVE_PATH, RECIPIENTS

Instance Attribute Summary (collapse)

Instance Method Summary (collapse)

Methods inherited from Plugin

#l10n_sessions, #log_info, #recipients, #resolve_link, #update_rss_feeds

Methods included from HttpFile

#http_body, #http_file

Constructor Details

- (FlockhartPlugin) initialize(app, refetch_pages)

A new instance of FlockhartPlugin



277
278
279
280
281
# File 'src/plugin/flockhart.rb', line 277

def initialize(app, refetch_pages)
  @app = app
  @refetch_pages = refetch_pages
  @parsing_errors = {}
end

Instance Attribute Details

- (Object) parsing_errors (readonly)

Returns the value of attribute parsing_errors



14
15
16
# File 'test/test_plugin/flockhart.rb', line 14

def parsing_errors
  @parsing_errors
end

Instance Method Details

- (Object) fetch_page(page_name)



282
283
284
285
286
287
# File 'src/plugin/flockhart.rb', line 282

def fetch_page(page_name)
  path = [HTML_PATH, page_name].join("/")
  target = [TARGET, page_name].join("/")
  file = http_file(HTTP_SERVER, path, target) 
  file
end


352
353
354
355
356
357
358
359
360
361
362
363
364
# File 'src/plugin/flockhart.rb', line 352

def get_table_links
  writer = TableLinksWriter.new
  formatter = Formatter.new(writer)
  parser = Parser.new(formatter)
  file = [TARGET, TABLE].join("/")
  html = File.read(file)
  parser.feed(html)
  writer.extract_data
  if(writer.links.size != LINKS.size)
    @parsing_errors.store("flockhart", 'different amount of links found in table.asp')
  end
  writer.links
end

- (Object) parse_detail_page(cyt_name, page)



288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
# File 'src/plugin/flockhart.rb', line 288

def parse_detail_page cyt_name, page
  div = (page/"div[@class=content_content_inner]").first
  cytochrome = Cytochrome.new cyt_name
  buffer = ''
  connection = nil
  current_table = nil
  abstract_link = nil
  (div/'td').each do |td|
    td.children.each do |child|
      if child.is_a?(Nokogiri::XML::Text)
        buffer << child.to_s.strip
      else
        case child.name
        when 'a'
          abstract_link = Interaction::AbstractLink.new
          match = /^(.*?)\s*(\[)?$/.match buffer
          abstract_link.text = match[1]
          buffer = match[2].to_s
          buffer << child.inner_text
          abstract_link.href = child.attributes["href"].to_s
          abstract_link.info = buffer
          connection.add_link abstract_link
        when 'b'
          if current_table
            name = "#{current_table.capitalize}Connection"
            klass = Interaction.const_get name
            connection = klass.new child.inner_text.capitalize, 'en'
            cytochrome.add_connection connection
          end
        when 'br'
          if "\n" == buffer[-1,1]
            buffer = ''
          elsif !buffer.empty?
            buffer << "\n"
          end
        when 'h2'
          if match = /SUBSTRATE|INHIBITOR|INDUCER/u.match(child.inner_text)
            current_table = match.to_s.downcase
          end
        end
      end
    end
  end
  cytochrome
end

- (Object) parse_detail_pages



333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
# File 'src/plugin/flockhart.rb', line 333

def parse_detail_pages
  agent = Mechanize.new
  links = get_table_links
  cytochromes = {}
  links.each do |link|
    cyt_name = link.split("references").first
    url = sprintf "http://%s%s/%s", HTTP_SERVER, HTML_PATH, link
    page = agent.get url
    cytochrome = parse_detail_page cyt_name, page
    if(names = FORMAT_CYT_ID[cyt_name])
      names.each do |name|
        cytochromes.store(name, cytochrome)
      end
    else
      cytochromes.store(cyt_name, cytochrome)
    end
  end
  cytochromes
end

- (Object) parse_table



365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
# File 'src/plugin/flockhart.rb', line 365

def parse_table
  if(@refetch_pages)
    fetch_page(TABLE)
  end
  writer = FlockhartWriter.new
  formatter = Formatter.new(writer)
  parser = Parser.new(formatter)
  file = [TARGET, TABLE].join("/")
  html = File.read(file)
  html.gsub!('<br /><br />', '<category />')
  parser.feed(html)
  result = {} 
  writer.extract_data.each { |key, value|
    result.store(key.split("/").pop, value)
  } 
  result
end

- (Object) report



382
383
384
385
386
387
388
389
390
391
392
393
394
# File 'src/plugin/flockhart.rb', line 382

def report
  errors = []
  unless(@parsing_errors.empty?)
    @parsing_errors.to_a.each { |error|
      errors << error.join(" => ")  
    }
  end
  lines = [
    "updated packages: #{@updated_packages.size}",
    "parsing errors:   #{@parsing_errors.size}",
  ] + errors.sort
  lines.join("\n")
end