summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrey Kislyuk <kislyuk@gmail.com>2023-04-22 08:06:31 -0700
committerAndrey Kislyuk <kislyuk@gmail.com>2023-04-22 08:16:57 -0700
commit4b3465530e043a38b8ea4ace20d52ef4e861ad5b (patch)
treea9cb4da4a7632da5d5fb653b1507c89d57c4b8db
parent65f530ab8076aca7571e3964905c82674fd57825 (diff)
Enable XML descent for streaming
-rw-r--r--yq/__init__.py23
-rw-r--r--yq/parser.py4
2 files changed, 21 insertions, 6 deletions
diff --git a/yq/__init__.py b/yq/__init__.py
index ad27db6..927e975 100644
--- a/yq/__init__.py
+++ b/yq/__init__.py
@@ -183,6 +183,7 @@ def yq(
width=None,
indentless_lists=False,
xml_root=None,
+ xml_item_depth=0,
xml_dtd=False,
xml_force_list=frozenset(),
explicit_start=False,
@@ -242,7 +243,10 @@ def yq(
elif input_format == "xml":
import xmltodict
- doc = xmltodict.parse(input_stream.read(), disable_entities=True, force_list=xml_force_list)
+ if xml_item_depth != 0:
+ raise Exception("xml_item_depth is not supported with xq -x")
+
+ doc = xmltodict.parse(input_stream.buffer, disable_entities=True, force_list=xml_force_list)
json.dump(doc, json_buffer, cls=JSONDateTimeEncoder)
json_buffer.write("\n")
elif input_format == "toml":
@@ -320,12 +324,21 @@ def yq(
elif input_format == "xml":
import xmltodict
+ def emit_entry(path, entry):
+ json.dump(entry, jq.stdin)
+ jq.stdin.write("\n") # type: ignore
+ return True
+
for input_stream in input_streams:
- json.dump(
- xmltodict.parse(input_stream.read(), disable_entities=True, force_list=xml_force_list),
- jq.stdin, # type: ignore
+ doc = xmltodict.parse(
+ input_stream.buffer,
+ disable_entities=True,
+ force_list=xml_force_list,
+ item_depth=xml_item_depth,
+ item_callback=emit_entry,
)
- jq.stdin.write("\n") # type: ignore
+ if doc:
+ emit_entry(None, doc)
elif input_format == "toml":
import tomlkit
diff --git a/yq/parser.py b/yq/parser.py
index 9d796dc..560cb6f 100644
--- a/yq/parser.py
+++ b/yq/parser.py
@@ -38,7 +38,7 @@ class Parser(argparse.ArgumentParser):
def get_parser(program_name, description):
# By default suppress these help strings and only enable them in the specific programs.
yaml_output_help, yaml_roundtrip_help, width_help, indentless_help, grammar_help = [argparse.SUPPRESS] * 5
- xml_output_help, xml_dtd_help, xml_root_help, xml_force_list_help = [argparse.SUPPRESS] * 4
+ xml_output_help, xml_item_depth_help, xml_dtd_help, xml_root_help, xml_force_list_help = [argparse.SUPPRESS] * 5
toml_output_help = argparse.SUPPRESS
if program_name == "yq":
@@ -60,6 +60,7 @@ def get_parser(program_name, description):
elif program_name == "xq":
current_language = "XML"
xml_output_help = "Transcode jq JSON output back into XML and emit it"
+ xml_item_depth_help = "Specify depth of items to emit from document (use this to stream large documents)"
xml_dtd_help = "Preserve XML Document Type Definition (disables streaming of multiple docs)"
xml_root_help = "When transcoding back to XML, envelope the output in an element with this name"
xml_force_list_help = (
@@ -107,6 +108,7 @@ def get_parser(program_name, description):
parser.add_argument(
"--xml-output", "-x", dest="output_format", action="store_const", const="xml", help=xml_output_help
)
+ parser.add_argument("--xml-item-depth", type=int, default=0, help=xml_item_depth_help)
parser.add_argument("--xml-dtd", action="store_true", help=xml_dtd_help)
parser.add_argument("--xml-root", help=xml_root_help)
parser.add_argument("--xml-force-list", action="append", help=xml_force_list_help)