Class: ODDB::FiParse::FachinfoPDFWriter

Inherits:
Writer show all
Includes:
FachinfoWriterMethods, Rpdf2txt::DefaultHandler
Defined in:
ext/fiparse/src/fachinfo_pdf.rb

Constant Summary

IMAGE_DIR =
File.join(IMAGE_DIR, 'fachinfo')
@@skip_pattern =
/documed|copyright|seite|page|[kc]ompendium/iu
@@hr_pattern =
/-{5}\s*$/u

Instance Method Summary (collapse)

Methods included from FachinfoWriterMethods

#to_fachinfo

Methods inherited from Writer

#named_chapter, #named_chapters, #new_alignment, #next_chapter, #send_hor_rule, #send_literal_data, #set_target

Methods inherited from NullWriter

#new_fonthandler, #new_linkhandler, #new_tablehandler, #send_meta

Constructor Details

- (FachinfoPDFWriter) initialize(*args)

A new instance of FachinfoPDFWriter



16
17
18
19
20
# File 'ext/fiparse/src/fachinfo_pdf.rb', line 16

def initialize(*args)
  super
  @chars_since_last_linebreak = 0
  @tableheader_lineno = nil
end

Instance Method Details

- (Object) add_text



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'ext/fiparse/src/fachinfo_pdf.rb', line 49

def add_text
  return unless(@font)
  if(@font.bold? && @font.italic?)
    heading = self.out.strip
    unless(heading.empty?)
      @chapter = next_chapter
      @chapter.heading = heading
      set_templates(@chapter)
      @section = @chapter.next_section
    end
  elsif(@font.bold?)
    if @chapter.nil?
      @name << self.out.strip
    elsif(@paragraph)
      @paragraph.set_format(:bold)
      @paragraph << self.out
      @paragraph.reduce_format(:bold)
    end
  elsif(@font.italic?)
    ## special case: italic after company-name is the 
    ## galenic_form-chapter of the pre AMZV-form of fi
    if(@chapter == @company)
      @chapter = next_chapter
      @section = @chapter.next_section
    end
    if(@fresh_paragraph || @preformatted)
      unless(self.out.empty?)
        @chapter ||= next_chapter
        @section = @chapter.next_section
        @section.subheading << self.out
        @paragraph = @section.next_paragraph
      end
      @wrote_section_heading = true
    elsif @paragraph
      @paragraph.set_format(:italic)
      @paragraph << self.out
      @paragraph.reduce_format(:italic)
    else
      warn "ignoring \"#{self.out}\""
    end
    if @paragraph && @paragraph.empty?
      @fresh_paragraph = false
    end
  else
    str_check = self.out.strip
    font_name = @font.basefont_name
    courier = !/courier/iu.match(font_name).nil?
    symbol = !/symbol/iu.match(font_name).nil?
    if(!@chapter.nil? && !str_check.empty? \
       && !@@skip_pattern.match(self.out))
      str = self.out
      @wrote_section_heading = false
      #for the first paragraph after a preformated paragraph
      if(!(courier || symbol) && @preformatted)
        @fresh_paragraph = true
      end
      if(@fresh_paragraph)
        @paragraph = @section.next_paragraph
      end
      if(symbol)
        @paragraph.set_format(:symbol)
        @symbol_format = true
      elsif(@symbol_format)
        @symbol_format = false
        @paragraph.reduce_format(:symbol)
      end
      if(courier)
        if(@paragraph.empty?)
          #str.strip!
          str.gsub(/^[\n\r]+/u, '')
          @paragraph.preformatted!
        elsif(!@paragraph.preformatted?)
          @paragraph = @section.next_paragraph
          @paragraph.preformatted!
        end
        if(@preceding_hr)
          str = "-"*80 << "\n" << str
          @preceding_hr = false
        end
      else
        str.gsub!(/-\n/u, "-")
        str.gsub!(/ ?\n ?/u, " ")
        @preformatted = false
      end
      @paragraph << str
      @fresh_paragraph = false
    end
  end
  @out = ''
end

- (Boolean) detect_tableheader?

Returns:

  • (Boolean)


21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'ext/fiparse/src/fachinfo_pdf.rb', line 21

def detect_tableheader?
  ## ignore empty lines at the start of the page
  if(@tableheader_lineno == 0 && @out.strip.empty?)
    return true
  end
  lines = @paragraph.to_s.split("\n")
  if(@tableheader_lineno \
    && (line = lines.at(@tableheader_lineno)) \
    && line.strip == @out.strip)
    @tableheader_lineno += 1
    true
  else
    @tableheader_lineno = nil
    false
  end
end

- (Object) new_font(font)



37
38
39
40
41
42
43
44
45
46
47
48
# File 'ext/fiparse/src/fachinfo_pdf.rb', line 37

def new_font(font)
  if(@font)
    self.add_text
    ## if the following is preformatted text, we would like to 
    ## know before the next call to add_text, e.g. for line_break
    ## and similar
    if(/courier/iu.match(font.basefont_name))
      @preformatted = true
    end
  end
  @font = font
end

- (Object) send_flowing_data(data)



139
140
141
142
# File 'ext/fiparse/src/fachinfo_pdf.rb', line 139

def send_flowing_data(data)
  @chars_since_last_linebreak += data.size
  self.out << data.gsub(/■/u, '')
end

- (Object) send_hr



143
144
145
146
147
148
149
150
151
152
153
154
# File 'ext/fiparse/src/fachinfo_pdf.rb', line 143

def send_hr
  send_line_break
  if(@paragraph && @paragraph.preformatted?)
    unless(@@hr_pattern.match(self.out) \
           || @@hr_pattern.match(@paragraph[-6..-1]))
      self.out << "-"*80 << "\n"
      @preceding_hr = false
    end
  else
    @preceding_hr = true
  end
end

- (Object) send_image(handle)



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# File 'ext/fiparse/src/fachinfo_pdf.rb', line 155

def send_image(handle)
  send_line_break
  if img = handle.image
    prefix = @name.downcase.gsub(/[^a-z]/u, '')
    directory = File.join(IMAGE_DIR, prefix[0,2])
    FileUtils.mkdir_p directory
    files = Dir.glob("#{directory}/#{prefix}*")
    save = files.find { |path|
      begin
        other, = Magick::Image.read(path)
        other == img
      rescue
        false
      end
    }
    if save.nil?
      id = files.collect { |path|
        match = /(\d+)\.png/u.match File.basename(path)
        match[1].to_i
      }.max.to_i.next
      save = File.join directory, "#{prefix}_#{id}.png"
    end
    img.write save
    @section.next_image.src = save[%r!/resources/.*!u]
  end
  send_line_break
rescue ArgumentError
end

- (Object) send_line_break



195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# File 'ext/fiparse/src/fachinfo_pdf.rb', line 195

def send_line_break
  ## After the first period in 'Valid until' 
  ## we can go on to the next chapter
  if(@chapter == @date && /\.\s*$/u.match(self.out))
    self.add_text
    @chapter = next_chapter
    @section = @chapter.next_section
    @paragraph = @section.next_paragraph
    @out = ''
    return
  end
  if(@preformatted)
    if(detect_tableheader?)
      @out = ''
    else
      self.add_text
      @paragraph << "\n" if(@paragraph)
    end
  elsif(@chars_since_last_linebreak < 80)
    self.send_paragraph
  elsif(!/[\s‐­-]$/u.match(self.out))
    self.out << " "
  end
  @chars_since_last_linebreak = 0
end

- (Object) send_page



183
184
185
186
187
188
189
190
191
192
193
194
# File 'ext/fiparse/src/fachinfo_pdf.rb', line 183

def send_page
  ## in newer fi-pdfs there is no change of font for 
  ## pagenumbers. Here in send_page we can recognize 
  ## and delete the page-numbering
  if(pos = @out.index(/\w+\s+\d+$/u))
    @out[pos..-1] = ''
  end
  self.add_text
  if(@preformatted)
    @tableheader_lineno = 0
  end
end

- (Object) send_paragraph



220
221
222
223
224
225
226
227
228
229
# File 'ext/fiparse/src/fachinfo_pdf.rb', line 220

def send_paragraph
  self.add_text
  par = @section.paragraphs.first
  if(@section.subheading[-1] != ?\n && (par.nil? || par.empty?))
    @section.subheading << "\n"
  end
  unless(@preformatted)
    @fresh_paragraph = true
  end
end