Coverage for src/api/indexapi_v1.py: 100%

67 statements  

« prev     ^ index     » next       coverage.py v7.10.0, created at 2026-04-03 13:54 +0000

1#!/usr/bin/python 

2# -*- coding: utf-8 -*- 

3# Copyright (c) 2018, Silvio Peroni <essepuntato@gmail.com> 

4# 

5# Permission to use, copy, modify, and/or distribute this software for any purpose 

6# with or without fee is hereby granted, provided that the above copyright notice 

7# and this permission notice appear in all copies. 

8# 

9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 

12# OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 

13# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 

14# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS 

15# SOFTWARE. 

16 

17__author__ = 'Arcangelo Massari & Ivan Heibi' 

18 

19from requests import RequestException, post 

20from json import loads 

21from indexapi_common import ( 

22 lower, # noqa: F401 - used by ramose via getattr 

23 env_config, 

24 get_unique_brs_metadata, 

25 get_pub_date, 

26 get_source, 

27 get_author, 

28 get_id_val, 

29 cit_journal_sc, 

30 cit_author_sc, 

31 cit_duration, 

32) 

33 

34 

35def id2omids(s): 

36 MULTI_VAL_MAX = 9000 

37 sparql_endpoint = env_config["sparql_endpoint_meta"] 

38 

39 sparql_query = """ 

40 PREFIX datacite: <http://purl.org/spar/datacite/> 

41 PREFIX literal: <http://www.essepuntato.it/2010/06/literalreification/> 

42 SELECT ?br { 

43 { ?identifier literal:hasLiteralValue '"""+s+"""'^^<http://www.w3.org/2001/XMLSchema#string>. } 

44 UNION 

45 { ?identifier literal:hasLiteralValue '"""+s+"""'. } 

46 ?br datacite:hasIdentifier ?identifier 

47 } 

48 """ 

49 

50 headers={"Accept": "application/sparql-results+json", "Content-Type": "application/sparql-query"} 

51 try: 

52 response = post(sparql_endpoint, headers=headers, data=sparql_query, timeout=45) 

53 response.raise_for_status() 

54 except RequestException: 

55 return "", 

56 r = loads(response.text) 

57 results = r["results"]["bindings"] 

58 omid_l = [elem["br"]["value"].split("meta/br/")[1] for elem in results] 

59 

60 if len(omid_l) == 0: 

61 return "", 

62 

63 sparql_values = [] 

64 for i in range(0, len(omid_l), MULTI_VAL_MAX): 

65 sparql_values.append( " ".join(["<https://w3id.org/oc/meta/br/"+e+">" for e in omid_l[i:i + MULTI_VAL_MAX]]) ) 

66 return sparql_values, 

67 

68def count_unique_cits(res, *args): 

69 header = res[0] 

70 citing_idx = header.index(args[1]) 

71 cited_idx = header.index(args[2]) 

72 set_oci = set() 

73 

74 # build 

75 if len(res) > 1: 

76 citing_to_dedup = [] 

77 cited_to_dedup = [] 

78 for row in res[1:]: 

79 citing_to_dedup.extend(row[citing_idx]) 

80 cited_to_dedup.extend(row[cited_idx]) 

81 

82 citing_to_dedup_meta = get_unique_brs_metadata( list(set(citing_to_dedup)), ids_only=True ) 

83 cited_to_dedup_meta = get_unique_brs_metadata( list(set(cited_to_dedup)), ids_only=True ) 

84 for _k_citing in citing_to_dedup_meta.keys(): 

85 for _k_cited in cited_to_dedup_meta.keys(): 

86 set_oci.add( (_k_citing,_k_cited) ) 

87 return [["count"],[ len( set_oci ) ]], True 

88 

89# args must contain the <citing> and <cited> 

90def citations_info(res, *args): 

91 

92 header = res[0] 

93 citing_idx = header.index(args[1]) 

94 cited_idx = header.index(args[2]) 

95 

96 # build 

97 f_res = [ 

98 ["oci", "citing", "cited", "creation", "timespan", "journal_sc", "author_sc"] 

99 ] 

100 

101 if len(res) > 1: 

102 citing_to_dedup = [] 

103 cited_to_dedup = [] 

104 for row in res[1:]: 

105 citing_to_dedup.extend(row[citing_idx]) 

106 cited_to_dedup.extend(row[cited_idx]) 

107 

108 citing_to_dedup_meta = get_unique_brs_metadata( list(set(citing_to_dedup)) ) 

109 cited_to_dedup_meta = get_unique_brs_metadata( list(set(cited_to_dedup)) ) 

110 

111 for citing_entity in citing_to_dedup_meta: 

112 for cited_entity in cited_to_dedup_meta: 

113 

114 _citing = citing_to_dedup_meta[citing_entity] 

115 _cited = cited_to_dedup_meta[cited_entity] 

116 

117 res_row = [ 

118 # oci value 

119 get_id_val(citing_entity)+"-"+get_id_val(cited_entity), 

120 # citing 

121 __get_doi(_citing), 

122 # cited 

123 __get_doi(_cited), 

124 # creation = citing[pub_date] 

125 get_pub_date(_citing), 

126 # timespan = citing[pub_date] - cited[pub_date] 

127 cit_duration(get_pub_date(_citing),get_pub_date(_cited)), 

128 # journal_sc = compare citing[source_id] and cited[source_id] 

129 cit_journal_sc(get_source(_citing),get_source(_cited)), 

130 # author_sc = compare citing[source_id] and cited[source_id] 

131 cit_author_sc(get_author(_citing),get_author(_cited)) 

132 ] 

133 f_res.append(res_row) 

134 

135 return f_res, True 

136 

137 

138def __get_doi(elem): 

139 str_ids = [] 

140 if "ids" in elem: 

141 for id in elem["ids"].split(" __ "): 

142 if id.startswith("doi:"): 

143 str_ids.append(id.split("doi:")[1]) 

144 

145 return " ".join(str_ids)