Class: PMC::DoiToPmidProcessor

Inherits:
Object
  • Object
show all
Defined in:
lib/pmc/doi_to_pmid_processor.rb

Overview

Get PMID for a list of Doi, using ID Converter API from NCBI www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/

Constant Summary collapse

BASE_URL =
"https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/"
MAX_SLICE =
150
TOOL =
"buhos"
EMAIL =
"clbustos.2@gmail.com"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(doi_list) ⇒ DoiToPmidProcessor

Returns a new instance of DoiToPmidProcessor.



46
47
48
49
50
# File 'lib/pmc/doi_to_pmid_processor.rb', line 46

def initialize(doi_list)
  @doi_list=doi_list
  @doi_as_pmid={}
  @doi_bad=[]
end

Instance Attribute Details

#doi_as_pmidObject (readonly)

Returns the value of attribute doi_as_pmid.



44
45
46
# File 'lib/pmc/doi_to_pmid_processor.rb', line 44

def doi_as_pmid
  @doi_as_pmid
end

#doi_badObject (readonly)

Returns the value of attribute doi_bad.



45
46
47
# File 'lib/pmc/doi_to_pmid_processor.rb', line 45

def doi_bad
  @doi_bad
end

#doi_listObject (readonly)

Returns the value of attribute doi_list.



43
44
45
# File 'lib/pmc/doi_to_pmid_processor.rb', line 43

def doi_list
  @doi_list
end

Instance Method Details

#processObject

NCBI request that the users should get 200 or less ids So, we use MAX_SLICE as maximum slice to make requests



53
54
55
56
57
58
59
60
# File 'lib/pmc/doi_to_pmid_processor.rb', line 53

def process
  @doi_list.each_slice(MAX_SLICE) do |slice_doi|
    out=process_doi_slice(slice_doi)
    while out!=:ok
      out=process_doi_slice(slice_doi)
    end
  end
end

#process_doi_slice(slice_doi) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/pmc/doi_to_pmid_processor.rb', line 61

def process_doi_slice(slice_doi)
  slice_doi.delete_if {|v|
    @doi_bad.include? v
  }

  slice_doi_url=slice_doi.map {|v| CGI.escape(v)}.join(",")
  url="#{BASE_URL}?tool=#{TOOL}&email=#{EMAIL}#{PMC.api_key}&idtype=doi&format=json&versions=no&ids=#{slice_doi_url}"
  #$log.info(url)
  uri = URI(url)
  res = Net::HTTP.get_response(uri)
  #$log.info(res.body)
  begin
    json=JSON.parse(res.body)
  rescue
    json=nil
  end

  if res.code!="200"
    message=json ? CGI.unescape(json["message"]) : nil
    if message=~/ID type of '(.+?)' is unknown/ or message=~/ID type 'doi' mismatch for '(.+?)'/
      @doi_bad.push($1)
      return :retry
    else
      raise IDConverterApiResponseError, "Can't retrieve information for slice #{slice_doi_url}. CODE: #{res.code}, Body:#{res.body}"
    end

  else
    if json
      if json["status"]!="ok"
        raise IDConverterApiResponseError, "Error on JSON retrieval #{res.body}"
      else
        json["records"].each {|record|
          @doi_as_pmid[record['doi']]=record["pmid"]
        }
      end
      :ok
    else
      :error
    end

  end
end