Coverage for oc_validator / cli.py: 0%
36 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-30 15:46 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-30 15:46 +0000
1# ISC License
2#
3# Copyright (c) 2023-2026, Elia Rizzetto, Silvio Peroni
4#
5# Permission to use, copy, modify, and/or distribute this software for any
6# purpose with or without fee is hereby granted, provided that the above
7# copyright notice and this permission notice appear in all copies.
8#
9# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
11# FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
12# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
14# OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15# PERFORMANCE OF THIS SOFTWARE.
17"""Command-line interface for oc_validator."""
19from argparse import ArgumentParser
20from oc_validator.main import Validator, ClosureValidator
23def add_common_args(parser: ArgumentParser) -> None:
24 """Add arguments shared by both subcommands."""
25 parser.add_argument(
26 '--use-lmdb', dest='use_lmdb', action='store_true', default=False,
27 help='Enable LMDB for efficient memory usage with large files.')
28 parser.add_argument(
29 '--map-size', dest='map_size', type=int, default=1,
30 help='LMDB map size in GiB (default: 1).')
31 parser.add_argument(
32 '--cache-dir', dest='cache_dir', type=str, default=None,
33 help='Base directory under which all LMDB caches are created.')
34 parser.add_argument(
35 '-v', '--verbose', dest='verbose', action='store_true', default=False,
36 help='Enable verbose logging output.')
37 parser.add_argument(
38 '--log-file', dest='log_file', type=str, default=None,
39 help='Write logs to this file instead of the terminal.')
42def cmd_validate(args) -> None:
43 """Run single-table validation."""
44 v = Validator(
45 args.input_csv,
46 args.output_dir,
47 use_meta_endpoint=args.use_meta_endpoint,
48 verify_id_existence=args.verify_id_existence,
49 use_lmdb=args.use_lmdb,
50 map_size=args.map_size * 1024 ** 3,
51 cache_dir=args.cache_dir,
52 verbose=args.verbose,
53 log_file=args.log_file,
54 )
55 v.validate()
58def cmd_closure(args) -> None:
59 """Run closure validation on a META-CSV + CITS-CSV pair."""
60 cv = ClosureValidator(
61 meta_in=args.meta,
62 meta_out_dir=args.meta_out,
63 cits_in=args.cits,
64 cits_out_dir=args.cits_out,
65 strict_sequentiality=args.strict_sequentiality,
66 meta_kwargs={
67 'use_meta_endpoint': args.use_meta_endpoint,
68 'verify_id_existence': args.verify_id_existence,
69 },
70 cits_kwargs={
71 'use_meta_endpoint': args.use_meta_endpoint,
72 'verify_id_existence': args.verify_id_existence,
73 },
74 use_lmdb=args.use_lmdb,
75 map_size=args.map_size * 1024 ** 3,
76 cache_dir=args.cache_dir,
77 verbose=args.verbose,
78 log_file=args.log_file,
79 )
80 cv.validate()
83def main() -> None:
84 """Entry point for the ``oc_validator`` CLI."""
85 parser = ArgumentParser(
86 prog='oc_validator',
87 description='Validate CSV documents storing citation data and bibliographic '
88 'metadata according to the OpenCitations Data Model.')
89 subparsers = parser.add_subparsers(dest='command', required=True)
91 # --- validate subcommand ---
92 validate_parser = subparsers.add_parser(
93 'validate',
94 help='Validate a single META-CSV or CITS-CSV document.')
95 validate_parser.add_argument(
96 '-i', '--input', dest='input_csv', required=True, type=str,
97 help='Path to the CSV document to validate.')
98 validate_parser.add_argument(
99 '-o', '--output', dest='output_dir', required=True, type=str,
100 help='Directory where the output JSON-L file will be stored.')
101 validate_parser.add_argument(
102 '-m', '--use-meta', dest='use_meta_endpoint', action='store_true',
103 help='Use the OC Meta endpoint to check if an ID exists.')
104 validate_parser.add_argument(
105 '-s', '--no-id-existence', dest='verify_id_existence', action='store_false',
106 help='Skip checking if IDs are registered somewhere.')
107 add_common_args(validate_parser)
108 validate_parser.set_defaults(func=cmd_validate)
110 # --- closure subcommand ---
111 closure_parser = subparsers.add_parser(
112 'closure',
113 help='Validate a META-CSV and CITS-CSV pair together, checking transitive closure.')
114 closure_parser.add_argument(
115 '--meta', required=True, type=str,
116 help='Path to the META-CSV file.')
117 closure_parser.add_argument(
118 '--meta-out', required=True, type=str,
119 help='Output directory for META-CSV validation results.')
120 closure_parser.add_argument(
121 '--cits', required=True, type=str,
122 help='Path to the CITS-CSV file.')
123 closure_parser.add_argument(
124 '--cits-out', required=True, type=str,
125 help='Output directory for CITS-CSV validation results.')
126 closure_parser.add_argument(
127 '--strict-sequentiality', dest='strict_sequentiality', action='store_true', default=False,
128 help='Skip closure check if individual validations already report errors.')
129 closure_parser.add_argument(
130 '-m', '--use-meta', dest='use_meta_endpoint', action='store_true',
131 help='Use the OC Meta endpoint to check if an ID exists.')
132 closure_parser.add_argument(
133 '-s', '--no-id-existence', dest='verify_id_existence', action='store_false',
134 help='Skip checking if IDs are registered somewhere.')
135 add_common_args(closure_parser)
136 closure_parser.set_defaults(func=cmd_closure)
138 args = parser.parse_args()
139 args.func(args)