diff options
author | Andrey Kislyuk <kislyuk@gmail.com> | 2023-04-22 08:06:31 -0700 |
---|---|---|
committer | Andrey Kislyuk <kislyuk@gmail.com> | 2023-04-22 08:16:57 -0700 |
commit | 4b3465530e043a38b8ea4ace20d52ef4e861ad5b (patch) | |
tree | a9cb4da4a7632da5d5fb653b1507c89d57c4b8db | |
parent | 65f530ab8076aca7571e3964905c82674fd57825 (diff) |
Enable XML descent for streaming
-rw-r--r-- | yq/__init__.py | 23 | ||||
-rw-r--r-- | yq/parser.py | 4 |
2 files changed, 21 insertions, 6 deletions
diff --git a/yq/__init__.py b/yq/__init__.py index ad27db6..927e975 100644 --- a/yq/__init__.py +++ b/yq/__init__.py @@ -183,6 +183,7 @@ def yq( width=None, indentless_lists=False, xml_root=None, + xml_item_depth=0, xml_dtd=False, xml_force_list=frozenset(), explicit_start=False, @@ -242,7 +243,10 @@ def yq( elif input_format == "xml": import xmltodict - doc = xmltodict.parse(input_stream.read(), disable_entities=True, force_list=xml_force_list) + if xml_item_depth != 0: + raise Exception("xml_item_depth is not supported with xq -x") + + doc = xmltodict.parse(input_stream.buffer, disable_entities=True, force_list=xml_force_list) json.dump(doc, json_buffer, cls=JSONDateTimeEncoder) json_buffer.write("\n") elif input_format == "toml": @@ -320,12 +324,21 @@ def yq( elif input_format == "xml": import xmltodict + def emit_entry(path, entry): + json.dump(entry, jq.stdin) + jq.stdin.write("\n") # type: ignore + return True + for input_stream in input_streams: - json.dump( - xmltodict.parse(input_stream.read(), disable_entities=True, force_list=xml_force_list), - jq.stdin, # type: ignore + doc = xmltodict.parse( + input_stream.buffer, + disable_entities=True, + force_list=xml_force_list, + item_depth=xml_item_depth, + item_callback=emit_entry, ) - jq.stdin.write("\n") # type: ignore + if doc: + emit_entry(None, doc) elif input_format == "toml": import tomlkit diff --git a/yq/parser.py b/yq/parser.py index 9d796dc..560cb6f 100644 --- a/yq/parser.py +++ b/yq/parser.py @@ -38,7 +38,7 @@ class Parser(argparse.ArgumentParser): def get_parser(program_name, description): # By default suppress these help strings and only enable them in the specific programs. yaml_output_help, yaml_roundtrip_help, width_help, indentless_help, grammar_help = [argparse.SUPPRESS] * 5 - xml_output_help, xml_dtd_help, xml_root_help, xml_force_list_help = [argparse.SUPPRESS] * 4 + xml_output_help, xml_item_depth_help, xml_dtd_help, xml_root_help, xml_force_list_help = [argparse.SUPPRESS] * 5 toml_output_help = argparse.SUPPRESS if program_name == "yq": @@ -60,6 +60,7 @@ def get_parser(program_name, description): elif program_name == "xq": current_language = "XML" xml_output_help = "Transcode jq JSON output back into XML and emit it" + xml_item_depth_help = "Specify depth of items to emit from document (use this to stream large documents)" xml_dtd_help = "Preserve XML Document Type Definition (disables streaming of multiple docs)" xml_root_help = "When transcoding back to XML, envelope the output in an element with this name" xml_force_list_help = ( @@ -107,6 +108,7 @@ def get_parser(program_name, description): parser.add_argument( "--xml-output", "-x", dest="output_format", action="store_const", const="xml", help=xml_output_help ) + parser.add_argument("--xml-item-depth", type=int, default=0, help=xml_item_depth_help) parser.add_argument("--xml-dtd", action="store_true", help=xml_dtd_help) parser.add_argument("--xml-root", help=xml_root_help) parser.add_argument("--xml-force-list", action="append", help=xml_force_list_help) |