Class: BibliographicFileProcessor

Inherits:

Object

Object
BibliographicFileProcessor

show all

Defined in:: lib/bibliographic_file_processor.rb

Overview

Searches based on bibliography files, like bibtex, should be processed, to obtain all necessary information This class retrieves records and references, using the file attached to a search.

Main methods are #process_file and #process_canonical_documents. Only if file can be processed, continues with the processing of canonical documents.

In pseudocode, the structure is

process_file()
  .get_integrator()
    for each reference in integrator
      .process_reference()
    add integrator references as records on search
process_canonical_documents
 for each record on search
   create a canonical document

Instance Attribute Summary collapse

#canonical_document_processed ⇒ Object readonly

Returns the value of attribute canonical_document_processed.
#error ⇒ Object readonly

Returns the value of attribute error.
#result ⇒ Object readonly

Returns the value of attribute result.
#search ⇒ Object readonly

Returns the value of attribute search.

Instance Method Summary collapse

#create_hash_update(fields, record) ⇒ Object
#get_integrator ⇒ Object

Factory method to retrieve a integrator.
#initialize(search) ⇒ BibliographicFileProcessor constructor

A new instance of BibliographicFileProcessor.
#log_error(message, extra_info = nil) ⇒ Object
#log_success(message, extra_info = nil) ⇒ Object
#process_canonical_documents ⇒ Object
#process_file ⇒ Object
#process_reference(bb_id, reference) ⇒ Object

Constructor Details

#initialize(search) ⇒ `BibliographicFileProcessor`

Returns a new instance of BibliographicFileProcessor.

# File 'lib/bibliographic_file_processor.rb', line 53

def initialize(search)
  @search = search
  @result = Result.new
  @error = nil
  @canonical_document_processed=false
  if process_file
    begin
      process_canonical_documents
    rescue Exception=>e
      log_error('bibliographic_file_processor.error_processing_canonical_documents', e.message)
      @error="#{I18n::t('bibliographic_file_processor.error_processing_canonical_documents')} #{e.message}"
      return false
    end
  end
end

Instance Attribute Details

#canonical_document_processed ⇒ `Object` (readonly)

Returns the value of attribute canonical_document_processed.



52
53
54

# File 'lib/bibliographic_file_processor.rb', line 52

def canonical_document_processed
  @canonical_document_processed
end

#error ⇒ `Object` (readonly)

Returns the value of attribute error.



51
52
53

# File 'lib/bibliographic_file_processor.rb', line 51

def error
  @error
end

#result ⇒ `Object` (readonly)

Returns the value of attribute result.



50
51
52

# File 'lib/bibliographic_file_processor.rb', line 50

def result
  @result
end

#search ⇒ `Object` (readonly)

Returns the value of attribute search.



49
50
51

# File 'lib/bibliographic_file_processor.rb', line 49

def search
  @search
end

Instance Method Details

#create_hash_update(fields, record) ⇒ `Object`

# File 'lib/bibliographic_file_processor.rb', line 170

def create_hash_update(fields, record)
  fields.inject({}) {|ac, v|
    ac[v] = record.send(v); ac;
  }
end

#get_integrator ⇒ `Object`

Factory method to retrieve a integrator.

For BibTex, ReferenceIntegrator::BibTex::Reader takes control and decides how to process using BibTeX fields For CSV, we need to send the bibliographic database.

See Also:

ReferenceIntegrator::BibTex
ReferenceIntegrator::CSV

# File 'lib/bibliographic_file_processor.rb', line 183

def get_integrator
  if @search[:file_body].nil?
    log_error('bibliographic_file_processor.no_file_available')
    false
  elsif @search[:filename]=~/\.ris$/
    begin
      BibliographicalImporter::Ris::Reader.parse(@search[:file_body])
    rescue Exception=>e
      log_error('bibliographic_file_processor.ris_integrator_failed', "<#{e.class}> : #{e.message}")
      @error=::I18n::t('bibliographic_file_processor.ris_integrator_failed'.to_sym)
      false
    end

  elsif @search[:filetype] == 'application/json' or @search[:filename] =~ /\.json$/
    begin
      BibliographicalImporter::Json::Reader.parse(@search[:file_body])
    rescue BibTeX::ParseError=>e
      log_error('bibliographic_file_processor.json_integrator_failed', "<#{e.class}> : #{e.message}")
      @error=::I18n::t('bibliographic_file_processor.json_integrator_failed'.to_sym)
      false
    end
  elsif @search[:filetype] == 'text/x-bibtex' or @search[:filename] =~ /\.bib$/ or @search[:filename] =~ /\.bibtex$/
    file_body=@search[:file_body].force_encoding("utf-8")
    file_body.scrub!("*") unless file_body.valid_encoding? # Fast fix. Just delete all non-utf8 characters
    begin
      BibliographicalImporter::BibTex::Reader.parse(file_body)
    rescue BibTeX::ParseError=>e
      log_error('bibliographic_file_processor.bibtex_integrator_failed', "<#{e.class}> : #{e.message}")
      @error=::I18n::t('bibliographic_file_processor.bibtex_integrator_failed'.to_sym)
      false
    end
  elsif @search[:filetype] == 'application/nbib' or @search[:filetype] == 'application/x-pubmed' or @search[:filename] =~ /\.nbib$/
    begin
      BibliographicalImporter::PubmedSummary::Reader.parse(@search[:file_body])
    rescue PubmedSummary::ParseError=>e
      log_error('bibliographic_file_processor.pubmed_integrator_failed', "<#{e.class}> : #{e.message}")
      @error=::I18n::t('bibliographic_file_processor.pubmed_integrator_failed'.to_sym)
      false
    end

  elsif @search[:filetype] == 'text/csv' # Por trabajar
    #$log.info(bibliographical_database_name)
    BibliographicalImporter::CSV::Reader.parse(@search[:file_body], @search.bibliographical_database_name)
  else
    log_error('bibliographic_file_processor.no_integrator_for_filetype')
    false
  end
end

#log_error(message, extra_info = nil) ⇒ `Object`



69
70
71

# File 'lib/bibliographic_file_processor.rb', line 69

def log_error(message, extra_info=nil)
  @result.error("#{::I18n::t(message)}: ID #{@search[:id]} #{extra_info}")
end

#log_success(message, extra_info = nil) ⇒ `Object`



73
74
75

# File 'lib/bibliographic_file_processor.rb', line 73

def log_success(message, extra_info=nil)
  @result.success("#{::I18n::t(message)}:ID #{@search[:id]} #{extra_info}")
end

#process_canonical_documents ⇒ `Object`

# File 'lib/bibliographic_file_processor.rb', line 129

def process_canonical_documents

  bb = BibliographicDatabase.id_a_name_h
  $db.transaction(:rollback => :rollback) do
    @search.records.each do |record|
      fields = [:title, :author, :year, :journal, :volume, :pages, :doi, :journal_abbr, :abstract, :pubmed_id,
                :wos_id, :scopus_id, :scielo_id]

      fields_update = create_hash_update(fields, record)
      registro_base_id = "#{bb[record.bibliographic_database_id]}_id".to_sym


      if record[:canonical_document_id].nil?
        can_doc = nil
        # Verifiquemos si existe doi
        if record[:doi].to_s =~ /10\./
          can_doc = CanonicalDocument[:doi => record[:doi]]
        end
        if can_doc.nil? and !record[:pubmed_id].nil?
          can_doc = CanonicalDocument[:pubmed_id => record[:pubmed_id]]
        end

        if can_doc.nil?
          fields_to_update=fields_update.merge({registro_base_id => record[:uid]})
          fields_to_update[:year]=0 if fields_to_update[:year].nil? # A VERY UGLY FIX. Maybe we just update canonical document to allow year=nil
          can_doc_id = CanonicalDocument.insert(fields_to_update)
          can_doc = CanonicalDocument[:id => can_doc_id]
        end
        record.update(:canonical_document_id => can_doc[:id])
      else


        update_cd_fields(fields, record, registro_base_id)

      end
    end
  end # db.transaction
  @canonical_document_processed=true
  log_success('bibliographic_file_processor.Search_canonical_documents_successfully', "#{I18n::t(:Count_canonical_documents)} : #{@search.records.count}" )
end

#process_file ⇒ `Object`

# File 'lib/bibliographic_file_processor.rb', line 78

def process_file
  begin
    integrator = get_integrator
    #$log.info(integrator)
    return false unless integrator
  rescue BibTeX::ParseError => e
    log_error('bibliographic_file_processor.error_parsing_file', e.message)
    @error="#{I18n::t('bibliographic_file_processor.error_parsing_file')} #{e.message}"
    return false
  end

  correct = true
  $db.transaction do
    bb = BibliographicDatabase.name_a_id_h
    ref_ids = []
    ref_i = 0
    integrator.each do |reference|
      ref_i += 1
      if reference.nil?
        @result.error(::I18n::t('bibliographic_file_processor.error_on_reference', i: ref_i))
        correct = false
        next
      end
      #$log.info(Encoding::default_external)
      #$log.info(reference.to_s.encoding)
      bb_id = bb[reference.type.to_s]
      #$log.info(reference.type.to_s )
      if bb_id.nil?
        @result.error(::I18n::t('bibliographic_file_processor.no_unique_id_for_integrator', integrator: bb_id))
        correct = false
        break
      end
      reg_o=process_reference(bb_id, reference)
      if reg_o.is_a? Result
        @result.add_result(reg_o)
        correct=false
      else
        ref_ids.push(reg_o[:id])
      end
    end
    @search.update_records(ref_ids)
  end
  if correct
    log_success('bibliographic_file_processor.Search_process_file_successfully')
  else
    @error=::I18n::t('bibliographic_file_processor.Search_process_file_error')
    log_error('bibliographic_file_processor.Search_process_file_error')
  end
  true
end

#process_reference(bb_id, reference) ⇒ `Object`

# File 'lib/bibliographic_file_processor.rb', line 264

def process_reference(bb_id, reference)
  result= Result.new
  reg_o = Record[:uid => reference.uid, :bibliographic_database_id => bb_id]

  if reg_o.nil?
    reg_o_id = Record.insert(:uid => reference.uid, :bibliographic_database_id => bb_id)
    reg_o = Record[reg_o_id]
  end


  fields = [:title, :author, :year, :journal, :volume, :pages, :doi, :journal_abbr, :abstract, :pubmed_id,
            :wos_id, :scopus_id, :scielo_id]

  fields_update = fields.find_all {|v| reg_o[:field].nil? and reference.send(v) != ''}.inject({}) {|ac, v|
    v2=reference.send(v).nil? ? nil : reference.send(v).encode("utf-8", invalid: :replace, undef: :replace, replace: '_').force_encoding("utf-8").scrub()
    #v2=reference.send(v).nil? ? nil : reference.send(v)
    #$log.info("#{v},#{v2.valid_encoding?}") unless v2.nil?
    ac[v] = v2; ac;
  }
  begin
    reg_o.update(fields_update)
  rescue Exception=>e
    error=true
    $log.error(fields_update)
    message="process_reference error:  #{reference.uid}, #{e.message}"
    $log.error(message)
    result.error(message)
    return result
  end

  # Procesar references
  cited_references = reference.cited_references
  unless cited_references.nil?
    cit_refs_ids = get_cit_refs_ids(cited_references)
    reg_o.update_references(cit_refs_ids)
  end
  reg_o
end

Class: BibliographicFileProcessor

Overview

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(search) ⇒ BibliographicFileProcessor

Instance Attribute Details

#canonical_document_processed ⇒ Object (readonly)

#error ⇒ Object (readonly)

#result ⇒ Object (readonly)

#search ⇒ Object (readonly)

Instance Method Details

#create_hash_update(fields, record) ⇒ Object

#get_integrator ⇒ Object

#log_error(message, extra_info = nil) ⇒ Object

#log_success(message, extra_info = nil) ⇒ Object

#process_canonical_documents ⇒ Object

#process_file ⇒ Object

#process_reference(bb_id, reference) ⇒ Object