Class: BibliographicFileProcessor

Inherits:
Object
  • Object
show all
Defined in:
lib/bibliographic_file_processor.rb

Overview

Searches based on bibliography files, like bibtex, should be processed, to obtain all necessary information This class retrieves records and references, using the file attached to a search.

Main methods are #process_file and #process_canonical_documents. Only if file can be processed, continues with the processing of canonical documents.

In pseudocode, the structure is

process_file()
  .get_integrator()
    for each reference in integrator
      .process_reference()
    add integrator references as records on search
process_canonical_documents
 for each record on search
   create a canonical document

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(search) ⇒ BibliographicFileProcessor

Returns a new instance of BibliographicFileProcessor.



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/bibliographic_file_processor.rb', line 53

def initialize(search)
  @search = search
  @result = Result.new
  @error = nil
  @canonical_document_processed=false
  if process_file
    begin
      process_canonical_documents
    rescue Exception=>e
      log_error('bibliographic_file_processor.error_processing_canonical_documents', e.message)
      @error="#{I18n::t('bibliographic_file_processor.error_processing_canonical_documents')} #{e.message}"
      return false
    end
  end
end

Instance Attribute Details

#canonical_document_processedObject (readonly)

Returns the value of attribute canonical_document_processed.



52
53
54
# File 'lib/bibliographic_file_processor.rb', line 52

def canonical_document_processed
  @canonical_document_processed
end

#errorObject (readonly)

Returns the value of attribute error.



51
52
53
# File 'lib/bibliographic_file_processor.rb', line 51

def error
  @error
end

#resultObject (readonly)

Returns the value of attribute result.



50
51
52
# File 'lib/bibliographic_file_processor.rb', line 50

def result
  @result
end

#searchObject (readonly)

Returns the value of attribute search.



49
50
51
# File 'lib/bibliographic_file_processor.rb', line 49

def search
  @search
end

Instance Method Details

#create_hash_update(fields, record) ⇒ Object



170
171
172
173
174
# File 'lib/bibliographic_file_processor.rb', line 170

def create_hash_update(fields, record)
  fields.inject({}) {|ac, v|
    ac[v] = record.send(v); ac;
  }
end

#get_integratorObject

Factory method to retrieve a integrator.

For BibTex, ReferenceIntegrator::BibTex::Reader takes control and decides how to process using BibTeX fields For CSV, we need to send the bibliographic database.

See Also:

  • ReferenceIntegrator::BibTex
  • ReferenceIntegrator::CSV


183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# File 'lib/bibliographic_file_processor.rb', line 183

def get_integrator
  if @search[:file_body].nil?
    log_error('bibliographic_file_processor.no_file_available')
    false
  elsif @search[:filename]=~/\.ris$/
    begin
      BibliographicalImporter::Ris::Reader.parse(@search[:file_body])
    rescue Exception=>e
      log_error('bibliographic_file_processor.ris_integrator_failed', "<#{e.class}> : #{e.message}")
      @error=::I18n::t('bibliographic_file_processor.ris_integrator_failed'.to_sym)
      false
    end

  elsif @search[:filetype] == 'application/json' or @search[:filename] =~ /\.json$/
    begin
      BibliographicalImporter::Json::Reader.parse(@search[:file_body])
    rescue BibTeX::ParseError=>e
      log_error('bibliographic_file_processor.json_integrator_failed', "<#{e.class}> : #{e.message}")
      @error=::I18n::t('bibliographic_file_processor.json_integrator_failed'.to_sym)
      false
    end
  elsif @search[:filetype] == 'text/x-bibtex' or @search[:filename] =~ /\.bib$/ or @search[:filename] =~ /\.bibtex$/
    file_body=@search[:file_body].force_encoding("utf-8")
    file_body.scrub!("*") unless file_body.valid_encoding? # Fast fix. Just delete all non-utf8 characters
    begin
      BibliographicalImporter::BibTex::Reader.parse(file_body)
    rescue BibTeX::ParseError=>e
      log_error('bibliographic_file_processor.bibtex_integrator_failed', "<#{e.class}> : #{e.message}")
      @error=::I18n::t('bibliographic_file_processor.bibtex_integrator_failed'.to_sym)
      false
    end
  elsif @search[:filetype] == 'application/nbib' or @search[:filetype] == 'application/x-pubmed' or @search[:filename] =~ /\.nbib$/
    begin
      BibliographicalImporter::PubmedSummary::Reader.parse(@search[:file_body])
    rescue PubmedSummary::ParseError=>e
      log_error('bibliographic_file_processor.pubmed_integrator_failed', "<#{e.class}> : #{e.message}")
      @error=::I18n::t('bibliographic_file_processor.pubmed_integrator_failed'.to_sym)
      false
    end

  elsif @search[:filetype] == 'text/csv' # Por trabajar
    #$log.info(bibliographical_database_name)
    BibliographicalImporter::CSV::Reader.parse(@search[:file_body], @search.bibliographical_database_name)
  else
    log_error('bibliographic_file_processor.no_integrator_for_filetype')
    false
  end
end

#log_error(message, extra_info = nil) ⇒ Object



69
70
71
# File 'lib/bibliographic_file_processor.rb', line 69

def log_error(message, extra_info=nil)
  @result.error("#{::I18n::t(message)}: ID #{@search[:id]} #{extra_info}")
end

#log_success(message, extra_info = nil) ⇒ Object



73
74
75
# File 'lib/bibliographic_file_processor.rb', line 73

def log_success(message, extra_info=nil)
  @result.success("#{::I18n::t(message)}:ID #{@search[:id]} #{extra_info}")
end

#process_canonical_documentsObject



129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/bibliographic_file_processor.rb', line 129

def process_canonical_documents

  bb = BibliographicDatabase.id_a_name_h
  $db.transaction(:rollback => :rollback) do
    @search.records.each do |record|
      fields = [:title, :author, :year, :journal, :volume, :pages, :doi, :journal_abbr, :abstract, :pubmed_id,
                :wos_id, :scopus_id, :scielo_id]

      fields_update = create_hash_update(fields, record)
      registro_base_id = "#{bb[record.bibliographic_database_id]}_id".to_sym


      if record[:canonical_document_id].nil?
        can_doc = nil
        # Verifiquemos si existe doi
        if record[:doi].to_s =~ /10\./
          can_doc = CanonicalDocument[:doi => record[:doi]]
        end
        if can_doc.nil? and !record[:pubmed_id].nil?
          can_doc = CanonicalDocument[:pubmed_id => record[:pubmed_id]]
        end

        if can_doc.nil?
          fields_to_update=fields_update.merge({registro_base_id => record[:uid]})
          fields_to_update[:year]=0 if fields_to_update[:year].nil? # A VERY UGLY FIX. Maybe we just update canonical document to allow year=nil
          can_doc_id = CanonicalDocument.insert(fields_to_update)
          can_doc = CanonicalDocument[:id => can_doc_id]
        end
        record.update(:canonical_document_id => can_doc[:id])
      else


        update_cd_fields(fields, record, registro_base_id)

      end
    end
  end # db.transaction
  @canonical_document_processed=true
  log_success('bibliographic_file_processor.Search_canonical_documents_successfully', "#{I18n::t(:Count_canonical_documents)} : #{@search.records.count}" )
end

#process_fileObject



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/bibliographic_file_processor.rb', line 78

def process_file
  begin
    integrator = get_integrator
    #$log.info(integrator)
    return false unless integrator
  rescue BibTeX::ParseError => e
    log_error('bibliographic_file_processor.error_parsing_file', e.message)
    @error="#{I18n::t('bibliographic_file_processor.error_parsing_file')} #{e.message}"
    return false
  end

  correct = true
  $db.transaction do
    bb = BibliographicDatabase.name_a_id_h
    ref_ids = []
    ref_i = 0
    integrator.each do |reference|
      ref_i += 1
      if reference.nil?
        @result.error(::I18n::t('bibliographic_file_processor.error_on_reference', i: ref_i))
        correct = false
        next
      end
      #$log.info(Encoding::default_external)
      #$log.info(reference.to_s.encoding)
      bb_id = bb[reference.type.to_s]
      #$log.info(reference.type.to_s )
      if bb_id.nil?
        @result.error(::I18n::t('bibliographic_file_processor.no_unique_id_for_integrator', integrator: bb_id))
        correct = false
        break
      end
      reg_o=process_reference(bb_id, reference)
      if reg_o.is_a? Result
        @result.add_result(reg_o)
        correct=false
      else
        ref_ids.push(reg_o[:id])
      end
    end
    @search.update_records(ref_ids)
  end
  if correct
    log_success('bibliographic_file_processor.Search_process_file_successfully')
  else
    @error=::I18n::t('bibliographic_file_processor.Search_process_file_error')
    log_error('bibliographic_file_processor.Search_process_file_error')
  end
  true
end

#process_reference(bb_id, reference) ⇒ Object



264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# File 'lib/bibliographic_file_processor.rb', line 264

def process_reference(bb_id, reference)
  result= Result.new
  reg_o = Record[:uid => reference.uid, :bibliographic_database_id => bb_id]

  if reg_o.nil?
    reg_o_id = Record.insert(:uid => reference.uid, :bibliographic_database_id => bb_id)
    reg_o = Record[reg_o_id]
  end


  fields = [:title, :author, :year, :journal, :volume, :pages, :doi, :journal_abbr, :abstract, :pubmed_id,
            :wos_id, :scopus_id, :scielo_id]

  fields_update = fields.find_all {|v| reg_o[:field].nil? and reference.send(v) != ''}.inject({}) {|ac, v|
    v2=reference.send(v).nil? ? nil : reference.send(v).encode("utf-8", invalid: :replace, undef: :replace, replace: '_').force_encoding("utf-8").scrub()
    #v2=reference.send(v).nil? ? nil : reference.send(v)
    #$log.info("#{v},#{v2.valid_encoding?}") unless v2.nil?
    ac[v] = v2; ac;
  }
  begin
    reg_o.update(fields_update)
  rescue Exception=>e
    error=true
    $log.error(fields_update)
    message="process_reference error:  #{reference.uid}, #{e.message}"
    $log.error(message)
    result.error(message)
    return result
  end

  # Procesar references
  cited_references = reference.cited_references
  unless cited_references.nil?
    cit_refs_ids = get_cit_refs_ids(cited_references)
    reg_o.update_references(cit_refs_ids)
  end
  reg_o
end