diff options
author | Andrey Kislyuk <kislyuk@gmail.com> | 2019-11-03 14:19:01 -0800 |
---|---|---|
committer | Andrey Kislyuk <kislyuk@gmail.com> | 2019-11-04 10:15:08 -0800 |
commit | 7ee1a4e4b3a971618c201024adb3f0aad10cb2db (patch) | |
tree | b889d303ab673befecd4438612d34980aaee6de5 | |
parent | 1eb5432eb3a85ede1c8f1fab9c69b2b91f5d05fd (diff) |
Add -Y/--yaml-roundtrip for preserving YAML styles and tags
-rw-r--r-- | .github/workflows/pythonpackage.yml | 2 | ||||
-rw-r--r-- | .travis.yml | 29 | ||||
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | README.rst | 79 | ||||
-rw-r--r-- | test/cfn.yaml | 92 | ||||
-rwxr-xr-x | test/test.py | 5 | ||||
-rwxr-xr-x | yq/__init__.py | 123 | ||||
-rw-r--r-- | yq/compat.py | 10 | ||||
-rw-r--r-- | yq/dumper.py | 74 | ||||
-rw-r--r-- | yq/loader.py | 58 | ||||
-rw-r--r-- | yq/parser.py | 73 |
11 files changed, 403 insertions, 144 deletions
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 15c1584..577f150 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-18.04 strategy: - max-parallel: 4 + max-parallel: 8 matrix: python-version: [2.7, 3.5, 3.6, 3.7, 3.8] diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 619c142..0000000 --- a/.travis.yml +++ /dev/null @@ -1,29 +0,0 @@ -language: python -sudo: required -dist: bionic - -python: - - 2.7 - - 3.5 - - 3.6 - - 3.7 - - 3.8 - -addons: - apt: - packages: - - jq - -before_install: - - pip install --quiet --upgrade pip codecov - -install: - - make install - -script: - - make test - -after_success: - - bash <(curl -s https://codecov.io/bash) - -sudo: false @@ -9,7 +9,7 @@ lint: test_deps ./setup.py flake8 test: test_deps lint - coverage run --source=$$(python setup.py --name) ./test/test.py + coverage run --source=$$(python setup.py --name) ./test/test.py -v init_docs: cd docs; sphinx-quickstart @@ -23,20 +23,81 @@ Like in ``jq``, you can also specify input filename(s) as arguments:: yq .foo.bar input.yml -By default, no conversion of ``jq`` output is done. Use the ``--yaml-output``/``-y`` argument to convert it back into YAML:: +By default, no conversion of ``jq`` output is done. Use the ``--yaml-output``/``-y`` option to convert it back into YAML:: cat input.yml | yq -y .foo.bar -Use the ``--width``/``-w`` argument to pass the line wrap width for string literals. All other command line arguments are -forwarded to ``jq``. ``yq`` forwards the exit code ``jq`` produced, unless there was an error in YAML parsing, in which case -the exit code is 1. See the `jq manual <https://stedolan.github.io/jq/manual/>`_ for more details on ``jq`` features and -options. +Mapping key order is preserved. By default, custom YAML `tags <http://www.yaml.org/spec/1.2/spec.html#id2764295>`_ and +`styles <https://yaml.org/spec/current.html#id2509255>`_ in the input are ignored. Use the ``--yaml-roundtrip``/``-Y`` +option to preserve YAML tags and styles by representing them as extra items in their enclosing mappings and sequences +while in JSON:: -YAML `tags <http://www.yaml.org/spec/1.2/spec.html#id2764295>`_ in the input are ignored (any nested data is treated as -untagged). Key order is preserved. + yq -Y .foo.bar input.yml + +Use the ``--width``/``-w`` option to pass the line wrap width for string literals. All other command line arguments +are forwarded to ``jq``. ``yq`` forwards the exit code ``jq`` produced, unless there was an error in YAML parsing, +in which case the exit code is 1. See the `jq manual <https://stedolan.github.io/jq/manual/>`_ for more details on +``jq`` features and options. Because YAML treats JSON as a dialect of YAML, you can use yq to convert JSON to YAML: ``yq -y . < in.json > out.yml``. +Preserving tags and styles using the ``-Y`` (``--yaml-roundtrip``) option +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``-Y`` option helps preserve custom `string styles <https://yaml-multiline.info/>`_ and +`tags <https://camel.readthedocs.io/en/latest/yamlref.html#tags>`_ in your document. For exmaple, consider the following +document (an `AWS CloudFormation <https://aws.amazon.com/cloudformation/>`_ template fragment):: + + Resources: + ElasticLoadBalancer: + Type: 'AWS::ElasticLoadBalancing::LoadBalancer' + Properties: + AvailabilityZones: !GetAZs '' + Instances: + - !Ref Ec2Instance1 + - !Ref Ec2Instance2 + Description: >- + Load balancer for Big Important Service. + + Good thing it's managed by this template. + +Passing this document through ``yq -y .Resources.ElasticLoadBalancer`` will drop custom tags, such as ``!Ref``, +and styles, such as the `folded <https://yaml-multiline.info/>`_ style of the ``Description`` field:: + + Type: AWS::ElasticLoadBalancing::LoadBalancer + Properties: + AvailabilityZones: '' + Instances: + - Ec2Instance1 + - Ec2Instance2 + Description: 'Load balancer for Big Important Service. + + Good thing it''s managed by this template.' + +By contrast, passing it through ``yq -Y .Resources.ElasticLoadBalancer`` will preserve tags and styles:: + + Type: 'AWS::ElasticLoadBalancing::LoadBalancer' + Properties: + AvailabilityZones: !GetAZs '' + Instances: + - !Ref 'Ec2Instance1' + - !Ref 'Ec2Instance2' + Description: >- + Load balancer for Big Important Service. + + Good thing it's managed by this template. + +To accomplish this in ``-Y`` mode, yq carries extra metadata (mapping pairs and sequence values) in the JSON +representation of your document for any custom tags or styles that it finds. When converting the JSON back into YAML, it +parses this metadata, re-applies the tags and styles, and discards the extra pairs and values. + +.. warning :: + + The ``-Y`` option is incompatible with jq filters that do not expect the extra information injected into the document + to preserve the YAML formatting. For example, a jq filter that counts entries in the Instances array will come up with + 4 entries instead of 2. A filter that expects all array entries to be mappings may break due to the presence of string + metadata keys. You may need to check your jq filter for compatibility/semantic validity when using the ``-Y`` option. + XML support ----------- ``yq`` also supports XML. The ``yq`` package installs an executable, ``xq``, which @@ -72,8 +133,8 @@ License ------- Licensed under the terms of the `Apache License, Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_. -.. image:: https://img.shields.io/travis/kislyuk/yq.svg - :target: https://travis-ci.org/kislyuk/yq +.. image:: https://github.com/kislyuk/yq/workflows/Python%20package/badge.svg + :target: https://github.com/kislyuk/yq/actions .. image:: https://codecov.io/github/kislyuk/yq/coverage.svg?branch=master :target: https://codecov.io/github/kislyuk/yq?branch=master .. image:: https://img.shields.io/pypi/v/yq.svg diff --git a/test/cfn.yaml b/test/cfn.yaml new file mode 100644 index 0000000..393ed3a --- /dev/null +++ b/test/cfn.yaml @@ -0,0 +1,92 @@ +AWSTemplateFormatVersion: '2010-09-09' +Metadata: + License: Apache-2.0 +Description: 'AWS CloudFormation Sample Template S3_Website_With_CloudFront_Distribution: + Sample template showing how to create a website with a custom DNS name, hosted on + Amazon S3 and served via Amazone CloudFront. It assumes you already have a Hosted + Zone registered with Amazon Route 53. **WARNING** This template creates an Amazon + Route 53 DNS record, an S3 bucket and a CloudFront distribution. You will be billed + for the AWS resources used if you create a stack from this template.' +Parameters: + HostedZone: + Type: String + Description: The DNS name of an existing Amazon Route 53 hosted zone + AllowedPattern: (?!-)[a-zA-Z0-9-.]{1,63}(?<!-) + ConstraintDescription: must be a valid DNS zone name. +Mappings: + Region2S3WebsiteSuffix: + us-east-1: + Suffix: .s3-website-us-east-1.amazonaws.com + us-west-1: + Suffix: .s3-website-us-west-1.amazonaws.com + us-west-2: + Suffix: .s3-website-us-west-2.amazonaws.com + eu-west-1: + Suffix: .s3-website-eu-west-1.amazonaws.com + ap-northeast-1: + Suffix: .s3-website-ap-northeast-1.amazonaws.com + ap-northeast-2: + Suffix: .s3-website-ap-northeast-2.amazonaws.com + ap-southeast-1: + Suffix: .s3-website-ap-southeast-1.amazonaws.com + ap-southeast-2: + Suffix: .s3-website-ap-southeast-2.amazonaws.com + ap-south-1: + Suffix: .s3-website-ap-south-1.amazonaws.com + us-east-2: + Suffix: .s3-website-us-east-2.amazonaws.com + sa-east-1: + Suffix: .s3-website-sa-east-1.amazonaws.com + cn-north-1: + Suffix: .s3-website.cn-north-1.amazonaws.com.cn + eu-central-1: + Suffix: .s3-website.eu-central-1.amazonaws.com +Resources: + S3BucketForWebsiteContent: + Type: AWS::S3::Bucket + Properties: + AccessControl: PublicRead + WebsiteConfiguration: + IndexDocument: index.html + ErrorDocument: error.html + WebsiteCDN: + Type: AWS::CloudFront::Distribution + Properties: + DistributionConfig: + Comment: CDN for S3-backed website + Aliases: + - !Join ['', [!Ref 'AWS::StackName', !Ref 'AWS::AccountId', ., !Ref 'AWS::Region', + ., !Ref 'HostedZone']] + Enabled: 'true' + DefaultCacheBehavior: + ForwardedValues: + QueryString: 'true' + TargetOriginId: only-origin + ViewerProtocolPolicy: allow-all + DefaultRootObject: index.html + Origins: + - CustomOriginConfig: + HTTPPort: '80' + HTTPSPort: '443' + OriginProtocolPolicy: http-only + DomainName: !Join ['', [!Ref 'S3BucketForWebsiteContent', !FindInMap [ + Region2S3WebsiteSuffix, !Ref 'AWS::Region', Suffix]]] + Id: only-origin + WebsiteDNSName: + Type: AWS::Route53::RecordSet + Properties: + HostedZoneName: !Join ['', [!Ref 'HostedZone', .]] + Comment: CNAME redirect custom name to CloudFront distribution + Name: !Join ['', [!Ref 'AWS::StackName', !Ref 'AWS::AccountId', ., !Ref 'AWS::Region', + ., !Ref 'HostedZone']] + Type: CNAME + TTL: '900' + ResourceRecords: + - !GetAtt [WebsiteCDN, DomainName] +Outputs: + WebsiteURL: + Value: !Join ['', ['http://', !Ref 'WebsiteDNSName']] + Description: The URL of the newly created website + BucketName: + Value: !Ref 'S3BucketForWebsiteContent' + Description: Name of S3 bucket to hold website content diff --git a/test/test.py b/test/test.py index 0cc8163..f21bbe2 100755 --- a/test/test.py +++ b/test/test.py @@ -131,6 +131,11 @@ class TestYq(unittest.TestCase): tf.seek(0) self.assertEqual(self.run_yq("", ["-y", ".xyz.foo", self.fd_path(tf)]), 'bar\n...\n') + def test_roundtrip_yaml(self): + cfn_filename = os.path.join(os.path.dirname(__file__), "cfn.yaml") + with io.open(cfn_filename) as fh: + self.assertEqual(self.run_yq("", ["-Y", ".", cfn_filename]), fh.read()) + @unittest.expectedFailure def test_times(self): """ diff --git a/yq/__init__.py b/yq/__init__.py index a33cce1..1dbc99b 100755 --- a/yq/__init__.py +++ b/yq/__init__.py @@ -13,26 +13,11 @@ from datetime import datetime, date, time import yaml -from .version import __version__ - -class Parser(argparse.ArgumentParser): - def print_help(self): - yq_help = argparse.ArgumentParser.format_help(self).splitlines() - print("\n".join(["usage: yq [options] <jq filter> [YAML file...]"] + yq_help[1:] + [""])) - try: - subprocess.check_call(["jq", "--help"]) - except Exception: - pass - -class OrderedLoader(yaml.SafeLoader): - pass - -class OrderedIndentlessDumper(yaml.SafeDumper): - pass - -class OrderedDumper(yaml.SafeDumper): - def increase_indent(self, flow=False, indentless=False): - return super(OrderedDumper, self).increase_indent(flow, False) +from .compat import USING_PYTHON2 +from .parser import get_parser, jq_arg_spec +from .loader import get_loader +from .dumper import get_dumper +from .version import __version__ # noqa class JSONDateTimeEncoder(json.JSONEncoder): def default(self, o): @@ -40,87 +25,12 @@ class JSONDateTimeEncoder(json.JSONEncoder): return o.isoformat() return json.JSONEncoder.default(self, o) -def construct_mapping(loader, node): - loader.flatten_mapping(node) - return OrderedDict(loader.construct_pairs(node)) - -def represent_dict_order(dumper, data): - return dumper.represent_mapping("tag:yaml.org,2002:map", data.items()) - def decode_docs(jq_output, json_decoder): while jq_output: doc, pos = json_decoder.raw_decode(jq_output) jq_output = jq_output[pos + 1:] yield doc -def parse_unknown_tags(loader, tag_suffix, node): - if isinstance(node, yaml.nodes.ScalarNode): - return loader.construct_scalar(node) - elif isinstance(node, yaml.nodes.SequenceNode): - return loader.construct_sequence(node) - elif isinstance(node, yaml.nodes.MappingNode): - return construct_mapping(loader, node) - -OrderedLoader.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping) -OrderedLoader.add_multi_constructor('', parse_unknown_tags) - -for dumper in OrderedIndentlessDumper, OrderedDumper: - dumper.add_representer(OrderedDict, represent_dict_order) - -# jq arguments that consume positionals must be listed here to avoid our parser mistaking them for our positionals -jq_arg_spec = {"--indent": 1, "-f": 1, "--from-file": 1, "-L": 1, "--arg": 2, "--argjson": 2, "--slurpfile": 2, - "--argfile": 2, "--rawfile": 2, "--args": argparse.REMAINDER, "--jsonargs": argparse.REMAINDER} - -# Detection for Python 2 -USING_PYTHON2 = True if sys.version_info < (3, 0) else False - -def get_parser(program_name): - # By default suppress these help strings and only enable them in the specific programs. - yaml_output_help, width_help, indentless_help = argparse.SUPPRESS, argparse.SUPPRESS, argparse.SUPPRESS - xml_output_help, xml_dtd_help, xml_root_help = argparse.SUPPRESS, argparse.SUPPRESS, argparse.SUPPRESS - toml_output_help = argparse.SUPPRESS - - if program_name == "yq": - current_language = "YAML" - yaml_output_help = "Transcode jq JSON output back into YAML and emit it" - width_help = "When using --yaml-output, specify string wrap width" - indentless_help = 'When using --yaml-output, indent block style lists (sequences) with 0 spaces instead of 2' - elif program_name == "xq": - current_language = "XML" - xml_output_help = "Transcode jq JSON output back into XML and emit it" - xml_dtd_help = "Preserve XML Document Type Definition (disables streaming of multiple docs)" - xml_root_help = "When transcoding back to XML, envelope the output in an element with this name" - elif program_name == "tq": - current_language = "TOML" - toml_output_help = "Transcode jq JSON output back into TOML and emit it" - else: - raise Exception("Unknown program name") - - description = __doc__.replace("yq", program_name).replace("YAML", current_language) - parser_args = dict(prog=program_name, description=description, formatter_class=argparse.RawTextHelpFormatter) - if sys.version_info >= (3, 5): - parser_args.update(allow_abbrev=False) # required to disambiguate options listed in jq_arg_spec - parser = Parser(**parser_args) - parser.add_argument("--output-format", default="json", help=argparse.SUPPRESS) - parser.add_argument("--yaml-output", "--yml-output", "-y", dest="output_format", action="store_const", const="yaml", - help=yaml_output_help) - parser.add_argument("--width", "-w", type=int, help=width_help) - parser.add_argument("--indentless-lists", "--indentless", action="store_true", help=indentless_help) - parser.add_argument("--xml-output", "-x", dest="output_format", action="store_const", const="xml", - help=xml_output_help) - parser.add_argument("--xml-dtd", action="store_true", help=xml_dtd_help) - parser.add_argument("--xml-root", help=xml_root_help) - parser.add_argument("--toml-output", "-t", dest="output_format", action="store_const", const="toml", - help=toml_output_help) - parser.add_argument("--version", action="version", version="%(prog)s {version}".format(version=__version__)) - - for arg in jq_arg_spec: - parser.add_argument(arg, nargs=jq_arg_spec[arg], dest=arg, action="append", help=argparse.SUPPRESS) - - parser.add_argument("jq_filter") - parser.add_argument("input_streams", nargs="*", type=argparse.FileType(), metavar="files", default=[sys.stdin]) - return parser - def xq_cli(): cli(input_format="xml", program_name="xq") @@ -128,16 +38,18 @@ def tq_cli(): cli(input_format="toml", program_name="tq") def cli(args=None, input_format="yaml", program_name="yq"): - parser = get_parser(program_name) + parser = get_parser(program_name, __doc__) args, jq_args = parser.parse_known_args(args=args) for i, arg in enumerate(jq_args): if arg.startswith("-") and not arg.startswith("--"): if "y" in arg: args.output_format = "yaml" + elif "Y" in arg: + args.output_format = "annotated_yaml" elif "x" in arg: args.output_format = "xml" - jq_args[i] = arg.replace("x", "").replace("y", "") + jq_args[i] = arg.replace("x", "").replace("y", "").replace("Y", "") if args.output_format != "json": jq_args[i] = jq_args[i].replace("C", "") if jq_args[i] == "-": @@ -170,8 +82,8 @@ def cli(args=None, input_format="yaml", program_name="yq"): yq(input_format=input_format, program_name=program_name, jq_args=jq_args, **vars(args)) def yq(input_streams=None, output_stream=None, input_format="yaml", output_format="json", - program_name="yq", width=None, indentless_lists=False, xml_root=None, xml_dtd=False, jq_args=frozenset(), - exit_func=None): + program_name="yq", width=None, indentless_lists=False, xml_root=None, xml_dtd=False, + jq_args=frozenset(), exit_func=None): if not input_streams: input_streams = [sys.stdin] if not output_stream: @@ -195,10 +107,12 @@ def yq(input_streams=None, output_stream=None, input_format="yaml", output_forma # TODO: enable true streaming in this branch (with asyncio, asyncproc, a multi-shot variant of # subprocess.Popen._communicate, etc.) # See https://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python + use_annotations = True if output_format == "annotated_yaml" else False input_docs = [] for input_stream in input_streams: if input_format == "yaml": - input_docs.extend(yaml.load_all(input_stream, Loader=OrderedLoader)) + loader = get_loader(use_annotations=use_annotations) + input_docs.extend(yaml.load_all(input_stream, Loader=loader)) elif input_format == "xml": import xmltodict input_docs.append(xmltodict.parse(input_stream.read(), disable_entities=True)) @@ -210,9 +124,9 @@ def yq(input_streams=None, output_stream=None, input_format="yaml", output_forma input_payload = "\n".join(json.dumps(doc, cls=JSONDateTimeEncoder) for doc in input_docs) jq_out, jq_err = jq.communicate(input_payload) json_decoder = json.JSONDecoder(object_pairs_hook=OrderedDict) - if output_format == "yaml": - dumper_class = OrderedIndentlessDumper if indentless_lists else OrderedDumper - yaml.dump_all(decode_docs(jq_out, json_decoder), stream=output_stream, Dumper=dumper_class, + if output_format == "yaml" or output_format == "annotated_yaml": + yaml.dump_all(decode_docs(jq_out, json_decoder), stream=output_stream, + Dumper=get_dumper(use_annotations=use_annotations, indentless=indentless_lists), width=width, allow_unicode=True, default_flow_style=False) elif output_format == "xml": import xmltodict @@ -249,8 +163,9 @@ def yq(input_streams=None, output_stream=None, input_format="yaml", output_forma toml.dump(doc, output_stream) else: if input_format == "yaml": + loader = get_loader(use_annotations=False) for input_stream in input_streams: - for doc in yaml.load_all(input_stream, Loader=OrderedLoader): + for doc in yaml.load_all(input_stream, Loader=loader): json.dump(doc, jq.stdin, cls=JSONDateTimeEncoder) jq.stdin.write("\n") elif input_format == "xml": diff --git a/yq/compat.py b/yq/compat.py new file mode 100644 index 0000000..5649909 --- /dev/null +++ b/yq/compat.py @@ -0,0 +1,10 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +import sys + +USING_PYTHON2 = True if sys.version_info < (3, 0) else False + +if USING_PYTHON2: + str = unicode # noqa +else: + str = str diff --git a/yq/dumper.py b/yq/dumper.py new file mode 100644 index 0000000..575436d --- /dev/null +++ b/yq/dumper.py @@ -0,0 +1,74 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +import re +from collections import OrderedDict + +import yaml + +from .compat import str +from .loader import hash_key + +class OrderedIndentlessDumper(yaml.SafeDumper): + pass + +class OrderedDumper(yaml.SafeDumper): + def increase_indent(self, flow=False, indentless=False): + return super(OrderedDumper, self).increase_indent(flow, False) + +yaml_value_annotation_re = re.compile(r"^__yq_(?P<type>tag|style)_(?P<key>.+)__$") +yaml_item_annotation_re = re.compile(r"^__yq_(?P<type>tag|style)_(?P<key>\d+)_(?P<value>.+)__$") + +def get_dumper(use_annotations=False, indentless=False): + def represent_dict(dumper, data): + pairs, custom_styles, custom_tags = [], {}, {} + for k, v in data.items(): + if use_annotations and isinstance(k, str): + value_annotation = yaml_value_annotation_re.match(k) + if value_annotation and value_annotation.group("type") == "style": + custom_styles[value_annotation.group("key")] = v + continue + elif value_annotation and value_annotation.group("type") == "tag": + custom_tags[value_annotation.group("key")] = v + continue + pairs.append((k, v)) + mapping = dumper.represent_mapping("tag:yaml.org,2002:map", pairs) + if use_annotations: + for k, v in mapping.value: + hashed_key = hash_key(k.value) + if hashed_key in custom_styles: + if isinstance(v, yaml.nodes.ScalarNode): + v.style = custom_styles[hashed_key] + elif custom_styles[hashed_key] == "flow": + v.flow_style = True + if hashed_key in custom_tags: + v.tag = custom_tags[hashed_key] + return mapping + + def represent_list(dumper, data): + raw_list, custom_styles, custom_tags = [], {}, {} + for v in data: + if use_annotations and isinstance(v, str): + annotation = yaml_item_annotation_re.match(v) + if annotation and annotation.group("type") == "style": + custom_styles[annotation.group("key")] = annotation.group("value") + continue + elif annotation and annotation.group("type") == "tag": + custom_tags[annotation.group("key")] = annotation.group("value") + continue + raw_list.append(v) + sequence = dumper.represent_list(raw_list) + if use_annotations: + for i, v in enumerate(sequence.value): + if str(i) in custom_styles: + if isinstance(v, yaml.nodes.ScalarNode): + v.style = custom_styles[str(i)] + elif custom_styles[str(i)] == "flow": + v.flow_style = True + if str(i) in custom_tags: + v.tag = custom_tags[str(i)] + return sequence + + dumper = OrderedIndentlessDumper if indentless else OrderedDumper + dumper.add_representer(OrderedDict, represent_dict) + dumper.add_representer(list, represent_list) + return dumper diff --git a/yq/loader.py b/yq/loader.py new file mode 100644 index 0000000..411ed86 --- /dev/null +++ b/yq/loader.py @@ -0,0 +1,58 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +import yaml +from base64 import b64encode +from collections import OrderedDict +from hashlib import sha224 + +from .compat import str + +def hash_key(key): + return b64encode(sha224(key.encode() if isinstance(key, str) else key).digest()).decode() + +class OrderedLoader(yaml.SafeLoader): + pass + +def get_loader(use_annotations=False): + def construct_sequence(loader, node): + annotations = [] + for i, v_node in enumerate(node.value): + if not use_annotations: + break + if v_node.tag and v_node.tag.startswith("!") and not v_node.tag.startswith("!!") and len(v_node.tag) > 1: + annotations.append("__yq_tag_{}_{}__".format(i, v_node.tag)) + if isinstance(v_node, yaml.nodes.ScalarNode) and v_node.style: + annotations.append("__yq_style_{}_{}__".format(i, v_node.style)) + elif isinstance(v_node, (yaml.nodes.SequenceNode, yaml.nodes.MappingNode)) and v_node.flow_style is True: + annotations.append("__yq_style_{}_{}__".format(i, "flow")) + return [loader.construct_object(i) for i in node.value] + annotations + + def construct_mapping(loader, node): + loader.flatten_mapping(node) # TODO: is this needed? + pairs = [] + for k_node, v_node in node.value: + key = loader.construct_object(k_node) + value = loader.construct_object(v_node) + pairs.append((key, value)) + if not (use_annotations and isinstance(key, (str, bytes))): + continue + if v_node.tag and v_node.tag.startswith("!") and not v_node.tag.startswith("!!") and len(v_node.tag) > 1: + pairs.append(("__yq_tag_{}__".format(hash_key(key)), v_node.tag)) + if isinstance(v_node, yaml.nodes.ScalarNode) and v_node.style: + pairs.append(("__yq_style_{}__".format(hash_key(key)), v_node.style)) + elif isinstance(v_node, (yaml.nodes.SequenceNode, yaml.nodes.MappingNode)) and v_node.flow_style is True: + pairs.append(("__yq_style_{}__".format(hash_key(key)), "flow")) + return OrderedDict(pairs) + + def parse_unknown_tags(loader, tag_suffix, node): + if isinstance(node, yaml.nodes.ScalarNode): + return loader.construct_scalar(node) + elif isinstance(node, yaml.nodes.SequenceNode): + return construct_sequence(loader, node) + elif isinstance(node, yaml.nodes.MappingNode): + return construct_mapping(loader, node) + + OrderedLoader.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping) + OrderedLoader.add_constructor(yaml.resolver.BaseResolver.DEFAULT_SEQUENCE_TAG, construct_sequence) + OrderedLoader.add_multi_constructor('', parse_unknown_tags) + return OrderedLoader diff --git a/yq/parser.py b/yq/parser.py new file mode 100644 index 0000000..765a35a --- /dev/null +++ b/yq/parser.py @@ -0,0 +1,73 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +import sys, argparse, subprocess + +from .version import __version__ + +# jq arguments that consume positionals must be listed here to avoid our parser mistaking them for our positionals +jq_arg_spec = {"--indent": 1, "-f": 1, "--from-file": 1, "-L": 1, "--arg": 2, "--argjson": 2, "--slurpfile": 2, + "--argfile": 2, "--rawfile": 2, "--args": argparse.REMAINDER, "--jsonargs": argparse.REMAINDER} + +class Parser(argparse.ArgumentParser): + def print_help(self): + yq_help = argparse.ArgumentParser.format_help(self).splitlines() + print("\n".join(["usage: yq [options] <jq filter> [YAML file...]"] + yq_help[1:] + [""])) + try: + subprocess.check_call(["jq", "--help"]) + except Exception: + pass + +def get_parser(program_name, description): + # By default suppress these help strings and only enable them in the specific programs. + yaml_output_help, yaml_roundtrip_help, width_help, indentless_help = (argparse.SUPPRESS, argparse.SUPPRESS, + argparse.SUPPRESS, argparse.SUPPRESS) + xml_output_help, xml_dtd_help, xml_root_help = argparse.SUPPRESS, argparse.SUPPRESS, argparse.SUPPRESS + toml_output_help = argparse.SUPPRESS + + if program_name == "yq": + current_language = "YAML" + yaml_output_help = "Transcode jq JSON output back into YAML and emit it" + yaml_roundtrip_help = """Transcode jq JSON output back into YAML and emit it. +Preserve YAML tags and styles by representing them as extra items +in their enclosing mappings and sequences while in JSON. This option +is incompatible with jq filters that do not expect these extra items.""" + width_help = "When using --yaml-output, specify string wrap width" + indentless_help = """When using --yaml-output, indent block style lists (sequences) +with 0 spaces instead of 2""" + elif program_name == "xq": + current_language = "XML" + xml_output_help = "Transcode jq JSON output back into XML and emit it" + xml_dtd_help = "Preserve XML Document Type Definition (disables streaming of multiple docs)" + xml_root_help = "When transcoding back to XML, envelope the output in an element with this name" + elif program_name == "tq": + current_language = "TOML" + toml_output_help = "Transcode jq JSON output back into TOML and emit it" + else: + raise Exception("Unknown program name") + + description = description.replace("yq", program_name).replace("YAML", current_language) + parser_args = dict(prog=program_name, description=description, formatter_class=argparse.RawTextHelpFormatter) + if sys.version_info >= (3, 5): + parser_args.update(allow_abbrev=False) # required to disambiguate options listed in jq_arg_spec + parser = Parser(**parser_args) + parser.add_argument("--output-format", default="json", help=argparse.SUPPRESS) + parser.add_argument("--yaml-output", "--yml-output", "-y", dest="output_format", action="store_const", const="yaml", + help=yaml_output_help) + parser.add_argument("--yaml-roundtrip", "--yml-roundtrip", "-Y", dest="output_format", action="store_const", + const="annotated_yaml", help=yaml_roundtrip_help) + parser.add_argument("--width", "-w", type=int, help=width_help) + parser.add_argument("--indentless-lists", "--indentless", action="store_true", help=indentless_help) + parser.add_argument("--xml-output", "-x", dest="output_format", action="store_const", const="xml", + help=xml_output_help) + parser.add_argument("--xml-dtd", action="store_true", help=xml_dtd_help) + parser.add_argument("--xml-root", help=xml_root_help) + parser.add_argument("--toml-output", "-t", dest="output_format", action="store_const", const="toml", + help=toml_output_help) + parser.add_argument("--version", action="version", version="%(prog)s {version}".format(version=__version__)) + + for arg in jq_arg_spec: + parser.add_argument(arg, nargs=jq_arg_spec[arg], dest=arg, action="append", help=argparse.SUPPRESS) + + parser.add_argument("jq_filter") + parser.add_argument("input_streams", nargs="*", type=argparse.FileType(), metavar="files", default=[sys.stdin]) + return parser |