Coverage for oc_validator / cli.py: 0%

36 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-30 15:46 +0000

1# ISC License 

2# 

3# Copyright (c) 2023-2026, Elia Rizzetto, Silvio Peroni 

4# 

5# Permission to use, copy, modify, and/or distribute this software for any 

6# purpose with or without fee is hereby granted, provided that the above 

7# copyright notice and this permission notice appear in all copies. 

8# 

9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH 

10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 

11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, 

12# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 

13# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 

14# OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 

15# PERFORMANCE OF THIS SOFTWARE. 

16 

17"""Command-line interface for oc_validator.""" 

18 

19from argparse import ArgumentParser 

20from oc_validator.main import Validator, ClosureValidator 

21 

22 

23def add_common_args(parser: ArgumentParser) -> None: 

24 """Add arguments shared by both subcommands.""" 

25 parser.add_argument( 

26 '--use-lmdb', dest='use_lmdb', action='store_true', default=False, 

27 help='Enable LMDB for efficient memory usage with large files.') 

28 parser.add_argument( 

29 '--map-size', dest='map_size', type=int, default=1, 

30 help='LMDB map size in GiB (default: 1).') 

31 parser.add_argument( 

32 '--cache-dir', dest='cache_dir', type=str, default=None, 

33 help='Base directory under which all LMDB caches are created.') 

34 parser.add_argument( 

35 '-v', '--verbose', dest='verbose', action='store_true', default=False, 

36 help='Enable verbose logging output.') 

37 parser.add_argument( 

38 '--log-file', dest='log_file', type=str, default=None, 

39 help='Write logs to this file instead of the terminal.') 

40 

41 

42def cmd_validate(args) -> None: 

43 """Run single-table validation.""" 

44 v = Validator( 

45 args.input_csv, 

46 args.output_dir, 

47 use_meta_endpoint=args.use_meta_endpoint, 

48 verify_id_existence=args.verify_id_existence, 

49 use_lmdb=args.use_lmdb, 

50 map_size=args.map_size * 1024 ** 3, 

51 cache_dir=args.cache_dir, 

52 verbose=args.verbose, 

53 log_file=args.log_file, 

54 ) 

55 v.validate() 

56 

57 

58def cmd_closure(args) -> None: 

59 """Run closure validation on a META-CSV + CITS-CSV pair.""" 

60 cv = ClosureValidator( 

61 meta_in=args.meta, 

62 meta_out_dir=args.meta_out, 

63 cits_in=args.cits, 

64 cits_out_dir=args.cits_out, 

65 strict_sequentiality=args.strict_sequentiality, 

66 meta_kwargs={ 

67 'use_meta_endpoint': args.use_meta_endpoint, 

68 'verify_id_existence': args.verify_id_existence, 

69 }, 

70 cits_kwargs={ 

71 'use_meta_endpoint': args.use_meta_endpoint, 

72 'verify_id_existence': args.verify_id_existence, 

73 }, 

74 use_lmdb=args.use_lmdb, 

75 map_size=args.map_size * 1024 ** 3, 

76 cache_dir=args.cache_dir, 

77 verbose=args.verbose, 

78 log_file=args.log_file, 

79 ) 

80 cv.validate() 

81 

82 

83def main() -> None: 

84 """Entry point for the ``oc_validator`` CLI.""" 

85 parser = ArgumentParser( 

86 prog='oc_validator', 

87 description='Validate CSV documents storing citation data and bibliographic ' 

88 'metadata according to the OpenCitations Data Model.') 

89 subparsers = parser.add_subparsers(dest='command', required=True) 

90 

91 # --- validate subcommand --- 

92 validate_parser = subparsers.add_parser( 

93 'validate', 

94 help='Validate a single META-CSV or CITS-CSV document.') 

95 validate_parser.add_argument( 

96 '-i', '--input', dest='input_csv', required=True, type=str, 

97 help='Path to the CSV document to validate.') 

98 validate_parser.add_argument( 

99 '-o', '--output', dest='output_dir', required=True, type=str, 

100 help='Directory where the output JSON-L file will be stored.') 

101 validate_parser.add_argument( 

102 '-m', '--use-meta', dest='use_meta_endpoint', action='store_true', 

103 help='Use the OC Meta endpoint to check if an ID exists.') 

104 validate_parser.add_argument( 

105 '-s', '--no-id-existence', dest='verify_id_existence', action='store_false', 

106 help='Skip checking if IDs are registered somewhere.') 

107 add_common_args(validate_parser) 

108 validate_parser.set_defaults(func=cmd_validate) 

109 

110 # --- closure subcommand --- 

111 closure_parser = subparsers.add_parser( 

112 'closure', 

113 help='Validate a META-CSV and CITS-CSV pair together, checking transitive closure.') 

114 closure_parser.add_argument( 

115 '--meta', required=True, type=str, 

116 help='Path to the META-CSV file.') 

117 closure_parser.add_argument( 

118 '--meta-out', required=True, type=str, 

119 help='Output directory for META-CSV validation results.') 

120 closure_parser.add_argument( 

121 '--cits', required=True, type=str, 

122 help='Path to the CITS-CSV file.') 

123 closure_parser.add_argument( 

124 '--cits-out', required=True, type=str, 

125 help='Output directory for CITS-CSV validation results.') 

126 closure_parser.add_argument( 

127 '--strict-sequentiality', dest='strict_sequentiality', action='store_true', default=False, 

128 help='Skip closure check if individual validations already report errors.') 

129 closure_parser.add_argument( 

130 '-m', '--use-meta', dest='use_meta_endpoint', action='store_true', 

131 help='Use the OC Meta endpoint to check if an ID exists.') 

132 closure_parser.add_argument( 

133 '-s', '--no-id-existence', dest='verify_id_existence', action='store_false', 

134 help='Skip checking if IDs are registered somewhere.') 

135 add_common_args(closure_parser) 

136 closure_parser.set_defaults(func=cmd_closure) 

137 

138 args = parser.parse_args() 

139 args.func(args)