Class: ODDB::TextInfoPlugin

Inherits:
Plugin show all
Defined in:
src/plugin/text_info.rb,
test/test_plugin/text_info.rb

Constant Summary

Constants inherited from Plugin

ARCHIVE_PATH, RECIPIENTS

Instance Attribute Summary (collapse)

Instance Method Summary (collapse)

Methods inherited from Plugin

#l10n_sessions, #log_info, #recipients, #resolve_link, #update_rss_feeds

Methods included from HttpFile

#http_body, #http_file

Constructor Details

- (TextInfoPlugin) initialize(app, opts = {})

A new instance of TextInfoPlugin



12
# File 'src/plugin/text_info.rb', line 12

def initialize app, opts={}

Instance Attribute Details

- (Object) current_eventtarget

Returns the value of attribute current_eventtarget



18
19
20
# File 'test/test_plugin/text_info.rb', line 18

def current_eventtarget
  @current_eventtarget
end

- (Object) current_search

Returns the value of attribute current_search



18
19
20
# File 'test/test_plugin/text_info.rb', line 18

def current_search
  @current_search
end

- (Object) iksless

Returns the value of attribute iksless



18
19
20
# File 'test/test_plugin/text_info.rb', line 18

def iksless
  @iksless
end

- (Object) parser

Returns the value of attribute parser



18
19
20
# File 'test/test_plugin/text_info.rb', line 18

def parser
  @parser
end

- (Object) session_failures

Returns the value of attribute session_failures



18
19
20
# File 'test/test_plugin/text_info.rb', line 18

def session_failures
  @session_failures
end

- (Object) updated_fis (readonly)

Returns the value of attribute updated_fis



11
12
13
# File 'src/plugin/text_info.rb', line 11

def updated_fis
  @updated_fis
end

- (Object) updated_pis (readonly)

Returns the value of attribute updated_pis



11
12
13
# File 'src/plugin/text_info.rb', line 11

def updated_pis
  @updated_pis
end

Instance Method Details

- (Object) detect_session_failure(page)



50
51
52
# File 'src/plugin/text_info.rb', line 50

def detect_session_failure page
  !page.form_with(:name => 'frmSearchForm').nil?
end

- (Object) download_info(type, name, agent, form, eventtarget)



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'src/plugin/text_info.rb', line 53

def download_info type, name, agent, form, eventtarget
  paths = {}
  flags = {}
  de, fr = nil
  de = submit_event agent, form, eventtarget
  if detect_session_failure(de)
    @session_failures += 1
    form = rebuild_resultlist agent
    de = submit_event agent, form, eventtarget
  end
  if match = /(Pseudo-Fach|Produkt)information/i.match(de.body)
    @ignored_pseudos += 1
    flags.store :pseudo, true
  end
  paths.store :de, save_info(type, name, :de, de, flags)
  fr = agent.get de.uri.to_s.gsub('lang=de', 'lang=fr')
  if detect_session_failure(fr)
    @session_failures += 1
    form = rebuild_resultlist agent
    de = submit_event agent, form, eventtarget
    fr = agent.get de.uri.to_s.gsub('lang=de', 'lang=fr')
  end
  paths.store :fr, save_info(type, name, :fr, fr, flags)
  [paths, flags]
rescue Mechanize::ResponseCodeError
  @download_errors.push name
  [paths, flags]
end

- (Object) eventtarget(string)



81
82
83
84
85
# File 'src/plugin/text_info.rb', line 81

def eventtarget string
  if match = /doPostBack\('([^']+)'.*\)/.match(string.to_s)
    match[1]
  end
end

- (Object) extract_iksnrs(languages)



86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'src/plugin/text_info.rb', line 86

def extract_iksnrs languages
  iksnrs = []
  languages.each_value do |doc|
    src = doc.iksnrs.to_s.gsub("'", "")
    if(match = src.match(/[0-9]{3,5}(?:\s*,\s*[0-9]{3,5})*/u))
      iksnrs.concat match.to_s.split(/\s*,\s*/u)
    end
  end
  iksnrs.collect! do |iksnr| sprintf("%05i", iksnr.to_i) end
  iksnrs.uniq!
  iksnrs
rescue
  []
end

- (Object) fachinfo_news(agent = init_agent)



100
101
102
103
104
105
106
107
108
109
110
111
# File 'src/plugin/text_info.rb', line 100

def fachinfo_news agent=init_agent
  url = ODDB.config.text_info_newssource \
    or raise 'please configure ODDB.config.text_info_newssource to proceed'
  name_list = []
  page = agent.get url
  list = page.at('div[id="blockContentInner"]/p')
  list.to_html.split("\<br\>").each do |element|
    name = element.delete("\<p\>").delete("\<\/p\>").chomp.strip
    name_list << name
  end
  return name_list.sort
end

- (Object) identify_eventtargets(page, ptrn)



112
113
114
115
116
117
118
# File 'src/plugin/text_info.rb', line 112

def identify_eventtargets page, ptrn
  eventtargets = {}
  page.links_with(:href => ptrn).each do |link|
    eventtargets.store link.text, eventtarget(link.href)
  end
  eventtargets
end

- (Object) import_companies(page, agent)



129
130
131
132
133
134
135
136
137
138
139
# File 'src/plugin/text_info.rb', line 129

def import_companies page, agent
  form = page.form_with :name => 'frmResulthForm'
  page.links_with(:href => /Linkbutton1/).each do |link|
    if et = eventtarget(link.href)
      @companies.push link.text
      @current_eventtarget = et
      products = submit_event agent, form, et
      import_products products, agent
    end
  end
end

- (Object) import_company(names, agent = init_agent)



119
120
121
122
123
124
125
126
127
128
# File 'src/plugin/text_info.rb', line 119

def import_company names, agent=init_agent
  @search_term = names.to_a.join ', '
  names.to_a.each do |name|
    @current_search = [:search_company, name]
    # search for company
    page = search_company name, agent
    # import each company from the result
    import_companies page, agent
  end
end

- (Object) import_fulltext(terms, agent = init_agent)



140
141
142
143
144
145
146
147
# File 'src/plugin/text_info.rb', line 140

def import_fulltext terms, agent=init_agent
  @search_term = terms.to_a.join ', '
  terms.to_a.each do |term|
    @current_search = [:search_fulltext, term]
    page = search_fulltext term, agent
    import_products page, agent
  end
end

- (Object) import_name(terms, agent = init_agent)



148
149
150
151
152
153
154
155
# File 'src/plugin/text_info.rb', line 148

def import_name terms, agent=init_agent
  @search_term = terms.to_a.join ', '
  terms.to_a.each do |term|
    @current_search = [:search_product, term]
    page = search_product term, agent
    import_products page, agent
  end
end

- (Object) import_news(agent = init_agent)



156
157
158
159
160
161
162
163
164
165
# File 'src/plugin/text_info.rb', line 156

def import_news agent=init_agent
  old_news = old_fachinfo_news
  news = fachinfo_news(agent)
  if update_name_list = true_news(news, old_news)
    import_name(update_name_list, agent)
    log_news news
    postprocess
  end
  return !update_name_list.empty?
end

- (Object) import_product(name, agent, form, fi_target, pi_target)



174
175
176
177
178
179
180
# File 'src/plugin/text_info.rb', line 174

def import_product name, agent, form, fi_target, pi_target
  fi_paths, fi_flags = download_info :fachinfo, name, agent, form, fi_target
  if pi_target
    pi_paths, pi_flags = download_info :patinfo, name, agent, form, pi_target
  end
  update_product name, fi_paths, pi_paths || {}, fi_flags, pi_flags || {}
end

- (Object) import_products(page, agent)



166
167
168
169
170
171
172
173
# File 'src/plugin/text_info.rb', line 166

def import_products page, agent
  fi_sources = identify_eventtargets page, /dtgFachinfo/
  pi_sources = identify_eventtargets page, /dtgPatienteninfo/
  form = page.form_with :name => /frmResult(Produkte|hForm)/
  fi_sources.sort.each do |name, eventtarget|
    import_product name, agent, form, eventtarget, pi_sources[name]
  end
end

- (Object) init_agent



33
34
35
36
37
# File 'src/plugin/text_info.rb', line 33

def init_agent
  agent = Mechanize.new
  agent.user_agent = "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; de-de) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.22"
  agent
end

- (Object) init_searchform(agent)



38
39
40
41
42
43
44
45
46
47
48
49
# File 'src/plugin/text_info.rb', line 38

def init_searchform agent
  url = ODDB.config.text_info_searchform \
    or raise 'please configure ODDB.config.text_info_searchform to proceed'
  page = agent.get(url)
  form, = page.form_with :name => 'frmNutzungsbedingungen'
  if form
    if btn = form.button_with(:name => 'btnAkzeptieren')
      page = agent.submit form, btn
    end
  end
  page
end

- (Object) log_news(lines)



181
182
183
184
185
186
# File 'src/plugin/text_info.rb', line 181

def log_news lines
  FileUtils.mkdir_p(File.dirname(@news_log))
  File.open(@news_log, 'w') do |fh|
    fh.print lines.join("\n")
  end
end

- (Object) old_fachinfo_news



187
188
189
190
191
192
193
194
195
# File 'src/plugin/text_info.rb', line 187

def old_fachinfo_news
  begin
    File.readlines(@news_log).collect do |line|
      line.strip
    end
  rescue Errno::ENOENT
    []
  end
end

- (Object) parse_fachinfo(path)



196
197
198
# File 'src/plugin/text_info.rb', line 196

def parse_fachinfo path
  @parser.parse_fachinfo_html path
end

- (Object) parse_patinfo(path)



199
200
201
# File 'src/plugin/text_info.rb', line 199

def parse_patinfo path
  @parser.parse_patinfo_html path
end

- (Object) postprocess



202
203
204
# File 'src/plugin/text_info.rb', line 202

def postprocess
  update_rss_feeds('fachinfo.rss', @app.sorted_fachinfos, View::Rss::Fachinfo)
end

- (Object) rebuild_resultlist(agent)



205
206
207
208
209
210
211
212
213
214
# File 'src/plugin/text_info.rb', line 205

def rebuild_resultlist agent
  method, term = @current_search
  page = self.send method, term, agent
  form = page.form_with :name => 'frmResulthForm'
  if @current_eventtarget
    products = submit_event agent, form, @current_eventtarget
    form = products.form_with :name => 'frmResultProdukte'
  end
  form
end

- (Object) replace(text_info, container, type)



215
216
217
218
219
220
221
# File 'src/plugin/text_info.rb', line 215

def replace text_info, container, type
  old_ti = container.send type
  @app.update container.pointer, type => text_info.pointer
  if old_ti && old_ti.empty?
    @app.delete old_ti.pointer
  end
end

- (Object) report



222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# File 'src/plugin/text_info.rb', line 222

def report
  unknown_size = @unknown_iksnrs.size
  unknown = @unknown_iksnrs.collect { |iksnr, name|
    "#{name} (#{iksnr})"
  }.join("\n")
  [
    "Searched for #{@search_term}",
    "Stored #{@updated_fis} Fachinfos",
    "Ignored #{@ignored_pseudos} Pseudo-Fachinfos",
    "Ignored #{@up_to_date_fis} up-to-date Fachinfo-Texts",
    "Stored #{@updated_pis} Patinfos",
    "Ignored #{@up_to_date_pis} up-to-date Patinfo-Texts", nil,
    "Checked #{@companies.size} companies",
    @companies.join("\n"), nil,
    "Unknown Iks-Numbers: #{unknown_size}",
    unknown, nil,
    "Fachinfos without iksnrs: #{@iksless.size}",
    @iksless.join("\n"), nil,
    "Session failures: #{@session_failures}", nil,
    "Download errors: #{@download_errors.size}",
    @download_errors.join("\n"), nil,
    "Parse Errors: #{@failures.size}", 
    @failures.join("\n"), 
  ].join("\n")
end

- (Object) save_info(type, name, lang, page, flags = {})



247
# File 'src/plugin/text_info.rb', line 247

def save_info type, name, lang, page, flags={}

- (Object) search(type, term, agent)



259
260
261
262
263
264
265
266
267
# File 'src/plugin/text_info.rb', line 259

def search type, term, agent
  page = init_searchform agent
  form = page.form_with :name => 'frmSearchForm'
  unless type == 'rbPraeparat' ## default value, clicking leads to an error
    form.radiobutton_with(:value => type).click
  end
  form['txtSearch'] = term
  agent.submit form
end

- (Object) search_company(name, agent)



268
269
270
# File 'src/plugin/text_info.rb', line 268

def search_company name, agent
  search 'rbFirma', name, agent
end

- (Object) search_fulltext(term, agent)



271
272
273
# File 'src/plugin/text_info.rb', line 271

def search_fulltext term, agent
  search 'rbFulltext', term, agent
end

- (Object) search_product(name, agent)



274
275
276
# File 'src/plugin/text_info.rb', line 274

def search_product name, agent
  search 'rbPraeparat', name, agent
end

- (Object) store_fachinfo(languages)



277
278
279
280
281
# File 'src/plugin/text_info.rb', line 277

def store_fachinfo languages
  @updated_fis += 1
  pointer = Persistence::Pointer.new(:fachinfo)
  @app.update(pointer.creator, languages)
end

- (Object) store_orphaned(iksnr, fis, pis)



282
283
284
285
286
287
288
289
290
291
292
293
294
295
# File 'src/plugin/text_info.rb', line 282

def store_orphaned iksnr, fis, pis
  pointer = Persistence::Pointer.new :orphaned_fachinfo
  store = {
    :key => iksnr,
    :languages => fis,
  }
  @app.update pointer.creator, store
  pointer = Persistence::Pointer.new :orphaned_patinfo
  store = {
    :key => iksnr,
    :languages => pis,
  }
  @app.update pointer.creator, store
end

- (Object) store_patinfo(reg, languages)



296
297
298
299
300
301
302
303
304
305
# File 'src/plugin/text_info.rb', line 296

def store_patinfo reg, languages
  @updated_pis +=1
  existing = reg.sequences.collect do |seqnr, seq|
    seq.patinfo end.compact.first
  ptr = Persistence::Pointer.new(:patinfo).creator
  if existing
    ptr = existing.pointer
  end
  @app.update ptr, languages
end

- (Object) submit_event(agent, form, eventtarget, *args)



306
307
308
309
310
311
312
313
314
315
316
317
318
319
# File 'src/plugin/text_info.rb', line 306

def submit_event agent, form, eventtarget, *args
  max_retries = ODDB.config.text_info_max_retry
  form['__EVENTTARGET'] = eventtarget
  agent.submit form, *args
rescue Mechanize::ResponseCodeError => err
  retries ||= max_retries
  if retries > 0
    retries -= 1
    sleep max_retries - retries
    retry
  else
    raise
  end
end

- (Object) true_news(news, old_news)



320
321
322
# File 'src/plugin/text_info.rb', line 320

def true_news news, old_news
  news - old_news
end

- (Object) update_product(name, fi_paths, pi_paths, fi_flags = {}, pi_flags = {})



323
# File 'src/plugin/text_info.rb', line 323

def update_product name, fi_paths, pi_paths, fi_flags={}, pi_flags={}