Class: ODDB::ChapterParse::Parser

Inherits:
BasicHtmlParser show all
Defined in:
ext/chapterparse/src/chaptparser.rb,
ext/chapterparse/test/test_parser.rb,
ext/chapterparse/test/test_integrate.rb

Constant Summary

SYMBOL_ENTITIES =
{
  # Symbol
  '913' => 'A',
  '914' => 'B',
  '915' => 'G',
  '916' => 'D',
  '917' => 'E',
  '918' => 'Z',
  '919' => 'H',
  '920' => 'Q',
  '921' => 'I',
  '922' => 'K',
  '923' => 'L',
  '924' => 'M',
  '925' => 'N',
  '926' => 'X',
  '927' => 'O',
  '928' => 'P',
  '929' => 'R',
  '931' => 'S',
  '932' => 'T',
  '933' => 'U',
  '934' => 'F',
  '935' => 'C',
  '936' => 'Y',
  '937' => 'W',
  '945' => 'a',
  '946' => 'b',
  '947' => 'g',
  '948' => 'd',
  '949' => 'e',
  '950' => 'z',
  '951' => 'h',
  '952' => 'q',
  '953' => 'i',
  '954' => 'k',
  '955' => 'l',
  '956' => 'm',
  '957' => 'n',
  '958' => 'x',
  '959' => 'o',
  '960' => 'p',
  '961' => 'r',
  '963' => 's',
  '964' => 't', 
  '965' => 'u', 
  '966' => 'f',
  '967' => 'c',
  '968' => 'y',
  '969' => 'w',
  '8704'=>  34.chr, # forall
  '8707'=>  36.chr, # exist
  '8727'=>  42.chr, # lowast
  '8722'=>  45.chr, # minus
  '8773'=>  64.chr, # cong
  '8869'=>  94.chr, # perp
  '8764'=> 126.chr, # sim
  '8804'=> 163.chr, # le
  '8734'=> 165.chr, # infin
  '402' => 166.chr, # fnof
  '8596'=> 171.chr, # harr
  '8592'=> 172.chr, # larr
  '8593'=> 173.chr, # uarr
  '8594'=> 174.chr, # rarr
  '8595'=> 175.chr, # darr
  '8805'=> 179.chr, # ge
  '8733'=> 181.chr, # prop
  '8706'=> 182.chr, # part
  '8800'=> 185.chr, # ne
  '8801'=> 186.chr, # equiv
  '8776'=> 187.chr, # asymp
  '8629'=> 191.chr, # crarr
  '8855'=> 196.chr, # otimes
  '8853'=> 197.chr, # oplus
  '8709'=> 198.chr, # empty
  '8745'=> 199.chr, # cap
  '8746'=> 200.chr, # cup
  '8835'=> 201.chr, # sup
  '8839'=> 202.chr, # supe
  '8836'=> 203.chr, # nsub
  '8834'=> 204.chr, # sub
  '8838'=> 205.chr, # sube
  '8712'=> 206.chr, # isin
  '8713'=> 207.chr, # notin
  '8736'=> 208.chr, # ang
  '8711'=> 209.chr, # nabla
  '8719'=> 213.chr, # prod
  '8730'=> 214.chr, # radic
  '8901'=> 215.chr, # sdot
  '8743'=> 217.chr, # and
  '8744'=> 218.chr, # or
  '8660'=> 219.chr, # hArr
  '8656'=> 220.chr, # lArr
  '8657'=> 221.chr, # uArr
  '8658'=> 222.chr, # rArr
  '8659'=> 223.chr, # dArr
  '8721'=> 229.chr, # sum
  '8747'=> 242.chr, # int
}

Constants inherited from BasicHtmlParser

Entitydefs

Instance Attribute Summary (collapse)

Instance Method Summary (collapse)

Methods inherited from BasicHtmlParser

#end_table, #end_td, #end_tr, #finish_endtag, #start_table, #start_td, #start_tr, #unknown_entityref

Constructor Details

- (Parser) initialize(*args)

A new instance of Parser



109
110
111
112
# File 'ext/chapterparse/src/chaptparser.rb', line 109

def initialize(*args)
  super
  @release_stack = []
end

Instance Attribute Details

- (Object) nofill (readonly)

Returns the value of attribute nofill



14
15
16
# File 'ext/chapterparse/test/test_parser.rb', line 14

def nofill
  @nofill
end

Instance Method Details

- (Object) analyse_attributes(attrs, release)



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'ext/chapterparse/src/chaptparser.rb', line 113

def analyse_attributes(attrs, release)
  if(style = fetch_attribute('style', attrs))
    if(/\bmono(space)?\b/iu.match(style))
      start_pre(attrs)
      release.push(:end_pre)
    elsif(/\bsans-serif\b/iu.match(style))
      suspend_pre(release)
    end
    if(/\bbold\b/iu.match(style))
      start_b(attrs)
      release.push(:end_b)
    end
    if(/\bitalic\b/iu.match(style))
      start_i(attrs)
      release.push(:end_i)
    end
    if(/\bvertical-align\s*:\s*super\b/iu.match(style))
      start_sup(attrs)
      release.push(:end_sup)
    elsif(/\bvertical-align\s*:\s*sub\b/iu.match(style))
      start_sub(attrs)
      release.push(:end_sub)
    end
  elsif((klass = fetch_attribute('class', attrs)) \
    && /\bpreformatted\b/iu.match(klass))
    start_pre(attrs)
    release.push(:end_pre)
  end
end

- (Object) end_div



142
143
144
# File 'ext/chapterparse/src/chaptparser.rb', line 142

def end_div
  release_tag
end

- (Object) end_font



145
146
147
# File 'ext/chapterparse/src/chaptparser.rb', line 145

def end_font
  release_tag
end

- (Object) end_h2



148
149
150
# File 'ext/chapterparse/src/chaptparser.rb', line 148

def end_h2
  end_i
end

- (Object) end_pre



151
152
153
154
155
156
157
158
# File 'ext/chapterparse/src/chaptparser.rb', line 151

def end_pre
  @nofill = @nofill - 1
  if(@nofill <= 0)
    @nofill = 0
    @formatter.end_paragraph(1)
  end
  @formatter.pop_font()
end

- (Object) end_span



159
160
161
# File 'ext/chapterparse/src/chaptparser.rb', line 159

def end_span
  release_tag
end

- (Object) end_sub



162
163
164
# File 'ext/chapterparse/src/chaptparser.rb', line 162

def end_sub
  @formatter.pop_fonthandler
end

- (Object) end_sup



165
166
167
# File 'ext/chapterparse/src/chaptparser.rb', line 165

def end_sup
  @formatter.pop_fonthandler
end

- (Object) fetch_attribute(name, attrs)



168
169
170
171
172
173
174
175
# File 'ext/chapterparse/src/chaptparser.rb', line 168

def fetch_attribute(name, attrs)
  attrs.reverse.each { |key, value|
    if(key == name)
      return value
    end
  }
  nil
end

- (Object) register_release_tag(&block)



183
184
185
186
187
# File 'ext/chapterparse/src/chaptparser.rb', line 183

def register_release_tag(&block)
  release = []
  block.call(release)
  @release_stack.push(release)
end

- (Object) release_tag



176
177
178
179
180
181
182
# File 'ext/chapterparse/src/chaptparser.rb', line 176

def release_tag
  if(release = @release_stack.pop)
    release.each { |symbol|
      self.send(symbol)
    }
  end   
end

- (Object) restart_pre



188
189
190
# File 'ext/chapterparse/src/chaptparser.rb', line 188

def restart_pre
  start_pre({})
end

- (Object) start_div(attrs)



191
192
193
194
195
196
# File 'ext/chapterparse/src/chaptparser.rb', line 191

def start_div(attrs)
  register_release_tag { |release|
    analyse_attributes(attrs, release)
  } 
  @formatter.add_line_break
end

- (Object) start_font(attrs)



197
198
199
200
201
202
203
204
205
206
207
208
# File 'ext/chapterparse/src/chaptparser.rb', line 197

def start_font(attrs)
  register_release_tag { |release|
    if(face = fetch_attribute('face', attrs))
      if(/\bmono(space)?\b/iu.match(face))
        start_pre(attrs)
        release.push(:end_pre)
      elsif(/\bsans-serif\b/iu.match(face))
        suspend_pre(release)
      end
    end
  }
end

- (Object) start_h2(attrs)



209
210
211
# File 'ext/chapterparse/src/chaptparser.rb', line 209

def start_h2(attrs)
  start_i(attrs)
end

- (Object) start_pre(attrs)



212
213
214
215
216
217
218
# File 'ext/chapterparse/src/chaptparser.rb', line 212

def start_pre(attrs)
  if(@nofill <= 0)
    @formatter.end_paragraph(1)
  end
  @formatter.push_font(nil, nil, nil, 1)
  @nofill = @nofill + 1
end

- (Object) start_span(attrs)



219
220
221
222
223
# File 'ext/chapterparse/src/chaptparser.rb', line 219

def start_span(attrs)
  register_release_tag { |release|
    analyse_attributes(attrs, release)
  }
end

- (Object) start_sub(attrs)



224
225
226
# File 'ext/chapterparse/src/chaptparser.rb', line 224

def start_sub(attrs)
  @formatter.push_fonthandler([['vertical-align', 'subscript']])
end

- (Object) start_sup(attrs)



227
228
229
# File 'ext/chapterparse/src/chaptparser.rb', line 227

def start_sup(attrs)
  @formatter.push_fonthandler([['vertical-align', 'superscript']])
end

- (Object) suspend_pre(release)



230
231
232
233
234
235
# File 'ext/chapterparse/src/chaptparser.rb', line 230

def suspend_pre(release)
  if(@nofill > 0)
    end_pre
    release.push(:restart_pre)
  end
end

- (Object) unknown_charref(ref)



236
237
238
239
240
241
242
243
244
# File 'ext/chapterparse/src/chaptparser.rb', line 236

def unknown_charref(ref)
  if(char = SYMBOL_ENTITIES[ref])
    @formatter.push_fonthandler([['face', 'Symbol']])
    self.handle_data(char)
    @formatter.pop_fonthandler
  else 
    self.handle_data("?")
  end
end