Class: ODDB::AnalysisParse::IndexFinder

Inherits:
PageHandler show all
Defined in:
ext/analysisparse/src/pagehandler.rb

Instance Attribute Summary (collapse)

Instance Method Summary (collapse)

Methods inherited from PageHandler

#analyze

Instance Attribute Details

- (Object) index (readonly)

Returns the value of attribute index



119
120
121
# File 'ext/analysisparse/src/pagehandler.rb', line 119

def index
  @index
end

Instance Method Details

- (Object) find_subchapters(pattern, txt)



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'ext/analysisparse/src/pagehandler.rb', line 120

def find_subchapters(pattern, txt)
  lines = []
  txt.each { |part|
    lines << ''
    lines.each { |line|
      line << ' ' << part.strip
      if(match = pattern.match(line))
        unless(match[1].strip == '')
          @index.store(match[2].to_i, match[1].strip)
          lines = []
        end
      end
    }
  }
end

- (Object) next_pagehandler(txt)



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'ext/analysisparse/src/pagehandler.rb', line 135

def next_pagehandler(txt)
  @index ||= {}
  if((/vorbemerkungen/iu.match(txt) \
      || /remarques\s*préliminaires/iu.match(txt)) \
      && !@index.empty?)
    IndexHandler.new(@index)
  else
    txt.gsub!(/~R/u, '\'')
    txt.gsub!(/\302\222/u, '\'')
    find_subchapters(/^\s*\d\.\s+(.*?)\.*\s*(\d*)/u, txt)
    find_subchapters(/^\s*\d\.\s*([^\d]+?)\..+?(\d+)/u, txt)
    find_subchapters(/^\s*\d\.\s*kapitel\s*:\s*(.*?)\s*[\d\.]{2,7}\s*.*?\.*\s*(\d+)\s*/imu, txt)
    find_subchapters(/\s*4\.\d\s*([\w\säöü\-\']+)\.*\s*?(\d{3})\s*?/iu, txt)
    find_subchapters(/^\s*5\.\d\s*anhang\s*[ABC]\s*(.*?)\s*\.*\s*(\d+)\s*$/u, txt)
    find_subchapters(/^\s*5\.\d\s*anhang\s*[ABC]\s*(.*?)[\d\.]{5,7}\s*.*?\.*\s*(\d+)\s*$/imu, txt)
    find_subchapters(/^\s*chapitre\s*\d:\s*(.*?)\s*[\d\.]{2,7}\s*.*?\.*\s*(\d+)\s*/imu, txt)
    find_subchapters(/\s*4\.\d\s*([\w\s\302\222éèà]+)\.*\s*(\d+)\s*/iu, txt)
    find_subchapters(/^\s*5\.\d\s*annexe\s*A\s*:\s*(.*?)\s*[\d\.]{5,7}\s*.*?\.*\s*(\d+)/imu, txt)
    find_subchapters(/^\s*5\.\d\s*annexe\s*[BC]\s*:\s*(.*?)\.*\s*(\d+)/iu, txt)
    @index.each_value { |val| 
      val = val.gsub!(/\s*\/\s*/u,'/')
    }
    @index.each_value { |val| val.gsub!(/\302\222/,'\'')}
    self
  end
end