Class: ODDB::WhoPlugin

Inherits:
Plugin show all
Defined in:
src/plugin/who.rb

Defined Under Namespace

Classes: CodeHandler

Constant Summary

UNIT_REPLACEMENTS =
{
  'TSD E' => 'TsdI.E.',
  'MIO E' => 'MioI.E.',
}
@@query_re =
/code=([A-Z0-9]+)/
@@lower =
/^(and|for|in(cl)?|on|plain|with)$/i

Constants inherited from Plugin

ARCHIVE_PATH, RECIPIENTS

Instance Attribute Summary (collapse)

Instance Method Summary (collapse)

Methods inherited from Plugin

#l10n_sessions, #log_info, #recipients, #resolve_link, #update_rss_feeds

Methods included from HttpFile

#http_body, #http_file

Constructor Details

- (WhoPlugin) initialize(*args)

A new instance of WhoPlugin



37
38
39
40
41
42
43
44
45
# File 'src/plugin/who.rb', line 37

def initialize *args
  super
  @url = 'http://www.whocc.no/atc_ddd_index/'
  @codes = CodeHandler.new
  @count = 0
  @created = 0
  @ddd_guidelines = 0
  @guidelines = 0
end

Instance Attribute Details

- (Object) codes (readonly)

Returns the value of attribute codes



12
13
14
# File 'src/plugin/who.rb', line 12

def codes
  @codes
end

Instance Method Details

- (Object) capitalize_all(str)



46
47
48
49
50
# File 'src/plugin/who.rb', line 46

def capitalize_all(str)
  ## benchmarked fastest against an append (<<) solution
  str.split(/\b/).collect { |part|
    @@lower.match(part) ? part.downcase : part.capitalize }.join
end

- (Object) extract_text(node)



51
52
53
54
55
56
57
58
59
# File 'src/plugin/who.rb', line 51

def extract_text(node)
  unless(node.children.any? { |br| br.element? && br.name != 'br' })
    html = node.inner_html
    if RUBY_VERSION < '1.9'
      html.gsub! /\240/, ''
    end
    html.gsub(/\s+/, ' ').gsub(/\s*<br\s*\/?>\s*/, "\n").strip
  end
end

- (Object) import(agent = Mechanize.new)



60
61
62
63
64
65
66
# File 'src/plugin/who.rb', line 60

def import(agent=Mechanize.new)
  while(code = @codes.shift)
    @count += 1
    import_code(agent, code)
  end
  report
end

- (Object) import_atc(code, link)



67
68
69
70
71
72
73
74
75
76
# File 'src/plugin/who.rb', line 67

def import_atc(code, link)
  name = capitalize_all link.inner_text.to_s
  pointer = if atc = @app.atc_class(code)
              atc.pointer
            else
              @created += 1
              Persistence::Pointer.new([:atc_class, code]).creator
            end
  @app.update pointer.creator, :en => name
end

- (Object) import_code(agent, get_code)



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'src/plugin/who.rb', line 77

def import_code(agent, get_code)
  page = agent.get(@url + "?code=%s&showdescription=yes" % get_code)
  (page/"//b/a").each do |link|
    if(match = @@query_re.match(link.attributes['href']))
      code = match[1]
      if(code == get_code)
        atc = import_atc(code, link)
        import_guidelines(atc, link)
      end
      @codes.push(code)
    end
  end
  (page/"//ul//a").each do |link|
    if(match = @@query_re.match(link.attributes['href']))
      code = match[1]
      atc = import_atc(code, link)
      import_ddds atc, link.parent.parent
    end
  end
end

- (Object) import_ddd_guidelines(atc, table)



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'src/plugin/who.rb', line 117

def import_ddd_guidelines(atc, table)
  chp, sec = nil
  if(table)
    (table/'td').each do |td|
      if (txt = extract_text(td)) && !txt.empty?
        chp ||= Text::Chapter.new
        sec ||= chp.next_section
        par = sec.next_paragraph
        par << txt
      end
    end
  end
  unless atc.ddd_guidelines && atc.ddd_guidelines.en == chp
    @ddd_guidelines += 1
    pointer = atc.pointer + :ddd_guidelines
    @app.update pointer.creator, :en => chp
    modified = true
  end
end

- (Object) import_ddds(atc, row)



97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'src/plugin/who.rb', line 97

def import_ddds(atc, row)
  code = nil
  begin
    code, link, dose, unit, adm, comment = row.children.collect do |td|
      extract_text(td).to_s end
    comment = comment.empty? ? nil: comment
    return unless code.empty? || code == atc.code
    unless dose.empty?
      key = "%s%s" % [adm.empty? ? '*' : adm, comment]
      pointer = if ddd = atc.ddd(key)
                  ddd.pointer
                else
                  atc.pointer + [:ddd, key]
                end
      unit = UNIT_REPLACEMENTS.fetch(unit, unit)
      @app.update pointer.creator, :note => comment,
                                   :dose => Drugs::Dose.new(dose, unit)
    end
  end while row = row.next_sibling
end

- (Object) import_guidelines(atc, link)



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'src/plugin/who.rb', line 136

def import_guidelines(atc, link)
  node = link.parent
  while(node.name != 'p')
    node = node.next_sibling or return
  end
  ## nokogiri fixes the faulty html of whocc.no, and moves the table element
  #  out of the p-container.
  table = node.next_sibling
  modified = false
  if table.name == 'table' && table[:bgcolor] == '#cccccc'
    modified = import_ddd_guidelines(atc, table)
  end
  chp = nil
  if (txt = extract_text(node)) && !txt.empty?
    chp = Text::Chapter.new
    sec = chp.next_section
    par = sec.next_paragraph
    par << txt
  end
  unless atc.guidelines && atc.guidelines.en == chp
    @guidelines += 1
    pointer = atc.pointer + :guidelines
    @app.update pointer.creator, :en => chp
    modified = true
  end
  modified
end

- (Object) report



163
164
165
166
167
168
169
170
# File 'src/plugin/who.rb', line 163

def report
  [
    sprintf("Imported %3i ATC-Codes", @count),
    sprintf("Updated  %3i English descriptions", @created),
    sprintf("Updated  %3i Guidelines", @guidelines),
    sprintf("Updated  %3i DDD-Guidelines", @ddd_guidelines),
  ].join("\n")
end