Coverage for src/api/indexapi_v1.py: 100%
67 statements
« prev ^ index » next coverage.py v7.10.0, created at 2026-04-03 13:54 +0000
« prev ^ index » next coverage.py v7.10.0, created at 2026-04-03 13:54 +0000
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3# Copyright (c) 2018, Silvio Peroni <essepuntato@gmail.com>
4#
5# Permission to use, copy, modify, and/or distribute this software for any purpose
6# with or without fee is hereby granted, provided that the above copyright notice
7# and this permission notice appear in all copies.
8#
9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
15# SOFTWARE.
17__author__ = 'Arcangelo Massari & Ivan Heibi'
19from requests import RequestException, post
20from json import loads
21from indexapi_common import (
22 lower, # noqa: F401 - used by ramose via getattr
23 env_config,
24 get_unique_brs_metadata,
25 get_pub_date,
26 get_source,
27 get_author,
28 get_id_val,
29 cit_journal_sc,
30 cit_author_sc,
31 cit_duration,
32)
35def id2omids(s):
36 MULTI_VAL_MAX = 9000
37 sparql_endpoint = env_config["sparql_endpoint_meta"]
39 sparql_query = """
40 PREFIX datacite: <http://purl.org/spar/datacite/>
41 PREFIX literal: <http://www.essepuntato.it/2010/06/literalreification/>
42 SELECT ?br {
43 { ?identifier literal:hasLiteralValue '"""+s+"""'^^<http://www.w3.org/2001/XMLSchema#string>. }
44 UNION
45 { ?identifier literal:hasLiteralValue '"""+s+"""'. }
46 ?br datacite:hasIdentifier ?identifier
47 }
48 """
50 headers={"Accept": "application/sparql-results+json", "Content-Type": "application/sparql-query"}
51 try:
52 response = post(sparql_endpoint, headers=headers, data=sparql_query, timeout=45)
53 response.raise_for_status()
54 except RequestException:
55 return "",
56 r = loads(response.text)
57 results = r["results"]["bindings"]
58 omid_l = [elem["br"]["value"].split("meta/br/")[1] for elem in results]
60 if len(omid_l) == 0:
61 return "",
63 sparql_values = []
64 for i in range(0, len(omid_l), MULTI_VAL_MAX):
65 sparql_values.append( " ".join(["<https://w3id.org/oc/meta/br/"+e+">" for e in omid_l[i:i + MULTI_VAL_MAX]]) )
66 return sparql_values,
68def count_unique_cits(res, *args):
69 header = res[0]
70 citing_idx = header.index(args[1])
71 cited_idx = header.index(args[2])
72 set_oci = set()
74 # build
75 if len(res) > 1:
76 citing_to_dedup = []
77 cited_to_dedup = []
78 for row in res[1:]:
79 citing_to_dedup.extend(row[citing_idx])
80 cited_to_dedup.extend(row[cited_idx])
82 citing_to_dedup_meta = get_unique_brs_metadata( list(set(citing_to_dedup)), ids_only=True )
83 cited_to_dedup_meta = get_unique_brs_metadata( list(set(cited_to_dedup)), ids_only=True )
84 for _k_citing in citing_to_dedup_meta.keys():
85 for _k_cited in cited_to_dedup_meta.keys():
86 set_oci.add( (_k_citing,_k_cited) )
87 return [["count"],[ len( set_oci ) ]], True
89# args must contain the <citing> and <cited>
90def citations_info(res, *args):
92 header = res[0]
93 citing_idx = header.index(args[1])
94 cited_idx = header.index(args[2])
96 # build
97 f_res = [
98 ["oci", "citing", "cited", "creation", "timespan", "journal_sc", "author_sc"]
99 ]
101 if len(res) > 1:
102 citing_to_dedup = []
103 cited_to_dedup = []
104 for row in res[1:]:
105 citing_to_dedup.extend(row[citing_idx])
106 cited_to_dedup.extend(row[cited_idx])
108 citing_to_dedup_meta = get_unique_brs_metadata( list(set(citing_to_dedup)) )
109 cited_to_dedup_meta = get_unique_brs_metadata( list(set(cited_to_dedup)) )
111 for citing_entity in citing_to_dedup_meta:
112 for cited_entity in cited_to_dedup_meta:
114 _citing = citing_to_dedup_meta[citing_entity]
115 _cited = cited_to_dedup_meta[cited_entity]
117 res_row = [
118 # oci value
119 get_id_val(citing_entity)+"-"+get_id_val(cited_entity),
120 # citing
121 __get_doi(_citing),
122 # cited
123 __get_doi(_cited),
124 # creation = citing[pub_date]
125 get_pub_date(_citing),
126 # timespan = citing[pub_date] - cited[pub_date]
127 cit_duration(get_pub_date(_citing),get_pub_date(_cited)),
128 # journal_sc = compare citing[source_id] and cited[source_id]
129 cit_journal_sc(get_source(_citing),get_source(_cited)),
130 # author_sc = compare citing[source_id] and cited[source_id]
131 cit_author_sc(get_author(_citing),get_author(_cited))
132 ]
133 f_res.append(res_row)
135 return f_res, True
138def __get_doi(elem):
139 str_ids = []
140 if "ids" in elem:
141 for id in elem["ids"].split(" __ "):
142 if id.startswith("doi:"):
143 str_ids.append(id.split("doi:")[1])
145 return " ".join(str_ids)