summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrey Kislyuk <kislyuk@gmail.com>2019-11-03 14:19:01 -0800
committerAndrey Kislyuk <kislyuk@gmail.com>2019-11-04 10:15:08 -0800
commit7ee1a4e4b3a971618c201024adb3f0aad10cb2db (patch)
treeb889d303ab673befecd4438612d34980aaee6de5
parent1eb5432eb3a85ede1c8f1fab9c69b2b91f5d05fd (diff)
Add -Y/--yaml-roundtrip for preserving YAML styles and tags
-rw-r--r--.github/workflows/pythonpackage.yml2
-rw-r--r--.travis.yml29
-rw-r--r--Makefile2
-rw-r--r--README.rst79
-rw-r--r--test/cfn.yaml92
-rwxr-xr-xtest/test.py5
-rwxr-xr-xyq/__init__.py123
-rw-r--r--yq/compat.py10
-rw-r--r--yq/dumper.py74
-rw-r--r--yq/loader.py58
-rw-r--r--yq/parser.py73
11 files changed, 403 insertions, 144 deletions
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index 15c1584..577f150 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-18.04
strategy:
- max-parallel: 4
+ max-parallel: 8
matrix:
python-version: [2.7, 3.5, 3.6, 3.7, 3.8]
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 619c142..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-language: python
-sudo: required
-dist: bionic
-
-python:
- - 2.7
- - 3.5
- - 3.6
- - 3.7
- - 3.8
-
-addons:
- apt:
- packages:
- - jq
-
-before_install:
- - pip install --quiet --upgrade pip codecov
-
-install:
- - make install
-
-script:
- - make test
-
-after_success:
- - bash <(curl -s https://codecov.io/bash)
-
-sudo: false
diff --git a/Makefile b/Makefile
index 56d6829..8888f91 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ lint: test_deps
./setup.py flake8
test: test_deps lint
- coverage run --source=$$(python setup.py --name) ./test/test.py
+ coverage run --source=$$(python setup.py --name) ./test/test.py -v
init_docs:
cd docs; sphinx-quickstart
diff --git a/README.rst b/README.rst
index 3682173..f77200f 100644
--- a/README.rst
+++ b/README.rst
@@ -23,20 +23,81 @@ Like in ``jq``, you can also specify input filename(s) as arguments::
yq .foo.bar input.yml
-By default, no conversion of ``jq`` output is done. Use the ``--yaml-output``/``-y`` argument to convert it back into YAML::
+By default, no conversion of ``jq`` output is done. Use the ``--yaml-output``/``-y`` option to convert it back into YAML::
cat input.yml | yq -y .foo.bar
-Use the ``--width``/``-w`` argument to pass the line wrap width for string literals. All other command line arguments are
-forwarded to ``jq``. ``yq`` forwards the exit code ``jq`` produced, unless there was an error in YAML parsing, in which case
-the exit code is 1. See the `jq manual <https://stedolan.github.io/jq/manual/>`_ for more details on ``jq`` features and
-options.
+Mapping key order is preserved. By default, custom YAML `tags <http://www.yaml.org/spec/1.2/spec.html#id2764295>`_ and
+`styles <https://yaml.org/spec/current.html#id2509255>`_ in the input are ignored. Use the ``--yaml-roundtrip``/``-Y``
+option to preserve YAML tags and styles by representing them as extra items in their enclosing mappings and sequences
+while in JSON::
-YAML `tags <http://www.yaml.org/spec/1.2/spec.html#id2764295>`_ in the input are ignored (any nested data is treated as
-untagged). Key order is preserved.
+ yq -Y .foo.bar input.yml
+
+Use the ``--width``/``-w`` option to pass the line wrap width for string literals. All other command line arguments
+are forwarded to ``jq``. ``yq`` forwards the exit code ``jq`` produced, unless there was an error in YAML parsing,
+in which case the exit code is 1. See the `jq manual <https://stedolan.github.io/jq/manual/>`_ for more details on
+``jq`` features and options.
Because YAML treats JSON as a dialect of YAML, you can use yq to convert JSON to YAML: ``yq -y . < in.json > out.yml``.
+Preserving tags and styles using the ``-Y`` (``--yaml-roundtrip``) option
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``-Y`` option helps preserve custom `string styles <https://yaml-multiline.info/>`_ and
+`tags <https://camel.readthedocs.io/en/latest/yamlref.html#tags>`_ in your document. For exmaple, consider the following
+document (an `AWS CloudFormation <https://aws.amazon.com/cloudformation/>`_ template fragment)::
+
+ Resources:
+ ElasticLoadBalancer:
+ Type: 'AWS::ElasticLoadBalancing::LoadBalancer'
+ Properties:
+ AvailabilityZones: !GetAZs ''
+ Instances:
+ - !Ref Ec2Instance1
+ - !Ref Ec2Instance2
+ Description: >-
+ Load balancer for Big Important Service.
+
+ Good thing it's managed by this template.
+
+Passing this document through ``yq -y .Resources.ElasticLoadBalancer`` will drop custom tags, such as ``!Ref``,
+and styles, such as the `folded <https://yaml-multiline.info/>`_ style of the ``Description`` field::
+
+ Type: AWS::ElasticLoadBalancing::LoadBalancer
+ Properties:
+ AvailabilityZones: ''
+ Instances:
+ - Ec2Instance1
+ - Ec2Instance2
+ Description: 'Load balancer for Big Important Service.
+
+ Good thing it''s managed by this template.'
+
+By contrast, passing it through ``yq -Y .Resources.ElasticLoadBalancer`` will preserve tags and styles::
+
+ Type: 'AWS::ElasticLoadBalancing::LoadBalancer'
+ Properties:
+ AvailabilityZones: !GetAZs ''
+ Instances:
+ - !Ref 'Ec2Instance1'
+ - !Ref 'Ec2Instance2'
+ Description: >-
+ Load balancer for Big Important Service.
+
+ Good thing it's managed by this template.
+
+To accomplish this in ``-Y`` mode, yq carries extra metadata (mapping pairs and sequence values) in the JSON
+representation of your document for any custom tags or styles that it finds. When converting the JSON back into YAML, it
+parses this metadata, re-applies the tags and styles, and discards the extra pairs and values.
+
+.. warning ::
+
+ The ``-Y`` option is incompatible with jq filters that do not expect the extra information injected into the document
+ to preserve the YAML formatting. For example, a jq filter that counts entries in the Instances array will come up with
+ 4 entries instead of 2. A filter that expects all array entries to be mappings may break due to the presence of string
+ metadata keys. You may need to check your jq filter for compatibility/semantic validity when using the ``-Y`` option.
+
XML support
-----------
``yq`` also supports XML. The ``yq`` package installs an executable, ``xq``, which
@@ -72,8 +133,8 @@ License
-------
Licensed under the terms of the `Apache License, Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_.
-.. image:: https://img.shields.io/travis/kislyuk/yq.svg
- :target: https://travis-ci.org/kislyuk/yq
+.. image:: https://github.com/kislyuk/yq/workflows/Python%20package/badge.svg
+ :target: https://github.com/kislyuk/yq/actions
.. image:: https://codecov.io/github/kislyuk/yq/coverage.svg?branch=master
:target: https://codecov.io/github/kislyuk/yq?branch=master
.. image:: https://img.shields.io/pypi/v/yq.svg
diff --git a/test/cfn.yaml b/test/cfn.yaml
new file mode 100644
index 0000000..393ed3a
--- /dev/null
+++ b/test/cfn.yaml
@@ -0,0 +1,92 @@
+AWSTemplateFormatVersion: '2010-09-09'
+Metadata:
+ License: Apache-2.0
+Description: 'AWS CloudFormation Sample Template S3_Website_With_CloudFront_Distribution:
+ Sample template showing how to create a website with a custom DNS name, hosted on
+ Amazon S3 and served via Amazone CloudFront. It assumes you already have a Hosted
+ Zone registered with Amazon Route 53. **WARNING** This template creates an Amazon
+ Route 53 DNS record, an S3 bucket and a CloudFront distribution. You will be billed
+ for the AWS resources used if you create a stack from this template.'
+Parameters:
+ HostedZone:
+ Type: String
+ Description: The DNS name of an existing Amazon Route 53 hosted zone
+ AllowedPattern: (?!-)[a-zA-Z0-9-.]{1,63}(?<!-)
+ ConstraintDescription: must be a valid DNS zone name.
+Mappings:
+ Region2S3WebsiteSuffix:
+ us-east-1:
+ Suffix: .s3-website-us-east-1.amazonaws.com
+ us-west-1:
+ Suffix: .s3-website-us-west-1.amazonaws.com
+ us-west-2:
+ Suffix: .s3-website-us-west-2.amazonaws.com
+ eu-west-1:
+ Suffix: .s3-website-eu-west-1.amazonaws.com
+ ap-northeast-1:
+ Suffix: .s3-website-ap-northeast-1.amazonaws.com
+ ap-northeast-2:
+ Suffix: .s3-website-ap-northeast-2.amazonaws.com
+ ap-southeast-1:
+ Suffix: .s3-website-ap-southeast-1.amazonaws.com
+ ap-southeast-2:
+ Suffix: .s3-website-ap-southeast-2.amazonaws.com
+ ap-south-1:
+ Suffix: .s3-website-ap-south-1.amazonaws.com
+ us-east-2:
+ Suffix: .s3-website-us-east-2.amazonaws.com
+ sa-east-1:
+ Suffix: .s3-website-sa-east-1.amazonaws.com
+ cn-north-1:
+ Suffix: .s3-website.cn-north-1.amazonaws.com.cn
+ eu-central-1:
+ Suffix: .s3-website.eu-central-1.amazonaws.com
+Resources:
+ S3BucketForWebsiteContent:
+ Type: AWS::S3::Bucket
+ Properties:
+ AccessControl: PublicRead
+ WebsiteConfiguration:
+ IndexDocument: index.html
+ ErrorDocument: error.html
+ WebsiteCDN:
+ Type: AWS::CloudFront::Distribution
+ Properties:
+ DistributionConfig:
+ Comment: CDN for S3-backed website
+ Aliases:
+ - !Join ['', [!Ref 'AWS::StackName', !Ref 'AWS::AccountId', ., !Ref 'AWS::Region',
+ ., !Ref 'HostedZone']]
+ Enabled: 'true'
+ DefaultCacheBehavior:
+ ForwardedValues:
+ QueryString: 'true'
+ TargetOriginId: only-origin
+ ViewerProtocolPolicy: allow-all
+ DefaultRootObject: index.html
+ Origins:
+ - CustomOriginConfig:
+ HTTPPort: '80'
+ HTTPSPort: '443'
+ OriginProtocolPolicy: http-only
+ DomainName: !Join ['', [!Ref 'S3BucketForWebsiteContent', !FindInMap [
+ Region2S3WebsiteSuffix, !Ref 'AWS::Region', Suffix]]]
+ Id: only-origin
+ WebsiteDNSName:
+ Type: AWS::Route53::RecordSet
+ Properties:
+ HostedZoneName: !Join ['', [!Ref 'HostedZone', .]]
+ Comment: CNAME redirect custom name to CloudFront distribution
+ Name: !Join ['', [!Ref 'AWS::StackName', !Ref 'AWS::AccountId', ., !Ref 'AWS::Region',
+ ., !Ref 'HostedZone']]
+ Type: CNAME
+ TTL: '900'
+ ResourceRecords:
+ - !GetAtt [WebsiteCDN, DomainName]
+Outputs:
+ WebsiteURL:
+ Value: !Join ['', ['http://', !Ref 'WebsiteDNSName']]
+ Description: The URL of the newly created website
+ BucketName:
+ Value: !Ref 'S3BucketForWebsiteContent'
+ Description: Name of S3 bucket to hold website content
diff --git a/test/test.py b/test/test.py
index 0cc8163..f21bbe2 100755
--- a/test/test.py
+++ b/test/test.py
@@ -131,6 +131,11 @@ class TestYq(unittest.TestCase):
tf.seek(0)
self.assertEqual(self.run_yq("", ["-y", ".xyz.foo", self.fd_path(tf)]), 'bar\n...\n')
+ def test_roundtrip_yaml(self):
+ cfn_filename = os.path.join(os.path.dirname(__file__), "cfn.yaml")
+ with io.open(cfn_filename) as fh:
+ self.assertEqual(self.run_yq("", ["-Y", ".", cfn_filename]), fh.read())
+
@unittest.expectedFailure
def test_times(self):
"""
diff --git a/yq/__init__.py b/yq/__init__.py
index a33cce1..1dbc99b 100755
--- a/yq/__init__.py
+++ b/yq/__init__.py
@@ -13,26 +13,11 @@ from datetime import datetime, date, time
import yaml
-from .version import __version__
-
-class Parser(argparse.ArgumentParser):
- def print_help(self):
- yq_help = argparse.ArgumentParser.format_help(self).splitlines()
- print("\n".join(["usage: yq [options] <jq filter> [YAML file...]"] + yq_help[1:] + [""]))
- try:
- subprocess.check_call(["jq", "--help"])
- except Exception:
- pass
-
-class OrderedLoader(yaml.SafeLoader):
- pass
-
-class OrderedIndentlessDumper(yaml.SafeDumper):
- pass
-
-class OrderedDumper(yaml.SafeDumper):
- def increase_indent(self, flow=False, indentless=False):
- return super(OrderedDumper, self).increase_indent(flow, False)
+from .compat import USING_PYTHON2
+from .parser import get_parser, jq_arg_spec
+from .loader import get_loader
+from .dumper import get_dumper
+from .version import __version__ # noqa
class JSONDateTimeEncoder(json.JSONEncoder):
def default(self, o):
@@ -40,87 +25,12 @@ class JSONDateTimeEncoder(json.JSONEncoder):
return o.isoformat()
return json.JSONEncoder.default(self, o)
-def construct_mapping(loader, node):
- loader.flatten_mapping(node)
- return OrderedDict(loader.construct_pairs(node))
-
-def represent_dict_order(dumper, data):
- return dumper.represent_mapping("tag:yaml.org,2002:map", data.items())
-
def decode_docs(jq_output, json_decoder):
while jq_output:
doc, pos = json_decoder.raw_decode(jq_output)
jq_output = jq_output[pos + 1:]
yield doc
-def parse_unknown_tags(loader, tag_suffix, node):
- if isinstance(node, yaml.nodes.ScalarNode):
- return loader.construct_scalar(node)
- elif isinstance(node, yaml.nodes.SequenceNode):
- return loader.construct_sequence(node)
- elif isinstance(node, yaml.nodes.MappingNode):
- return construct_mapping(loader, node)
-
-OrderedLoader.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping)
-OrderedLoader.add_multi_constructor('', parse_unknown_tags)
-
-for dumper in OrderedIndentlessDumper, OrderedDumper:
- dumper.add_representer(OrderedDict, represent_dict_order)
-
-# jq arguments that consume positionals must be listed here to avoid our parser mistaking them for our positionals
-jq_arg_spec = {"--indent": 1, "-f": 1, "--from-file": 1, "-L": 1, "--arg": 2, "--argjson": 2, "--slurpfile": 2,
- "--argfile": 2, "--rawfile": 2, "--args": argparse.REMAINDER, "--jsonargs": argparse.REMAINDER}
-
-# Detection for Python 2
-USING_PYTHON2 = True if sys.version_info < (3, 0) else False
-
-def get_parser(program_name):
- # By default suppress these help strings and only enable them in the specific programs.
- yaml_output_help, width_help, indentless_help = argparse.SUPPRESS, argparse.SUPPRESS, argparse.SUPPRESS
- xml_output_help, xml_dtd_help, xml_root_help = argparse.SUPPRESS, argparse.SUPPRESS, argparse.SUPPRESS
- toml_output_help = argparse.SUPPRESS
-
- if program_name == "yq":
- current_language = "YAML"
- yaml_output_help = "Transcode jq JSON output back into YAML and emit it"
- width_help = "When using --yaml-output, specify string wrap width"
- indentless_help = 'When using --yaml-output, indent block style lists (sequences) with 0 spaces instead of 2'
- elif program_name == "xq":
- current_language = "XML"
- xml_output_help = "Transcode jq JSON output back into XML and emit it"
- xml_dtd_help = "Preserve XML Document Type Definition (disables streaming of multiple docs)"
- xml_root_help = "When transcoding back to XML, envelope the output in an element with this name"
- elif program_name == "tq":
- current_language = "TOML"
- toml_output_help = "Transcode jq JSON output back into TOML and emit it"
- else:
- raise Exception("Unknown program name")
-
- description = __doc__.replace("yq", program_name).replace("YAML", current_language)
- parser_args = dict(prog=program_name, description=description, formatter_class=argparse.RawTextHelpFormatter)
- if sys.version_info >= (3, 5):
- parser_args.update(allow_abbrev=False) # required to disambiguate options listed in jq_arg_spec
- parser = Parser(**parser_args)
- parser.add_argument("--output-format", default="json", help=argparse.SUPPRESS)
- parser.add_argument("--yaml-output", "--yml-output", "-y", dest="output_format", action="store_const", const="yaml",
- help=yaml_output_help)
- parser.add_argument("--width", "-w", type=int, help=width_help)
- parser.add_argument("--indentless-lists", "--indentless", action="store_true", help=indentless_help)
- parser.add_argument("--xml-output", "-x", dest="output_format", action="store_const", const="xml",
- help=xml_output_help)
- parser.add_argument("--xml-dtd", action="store_true", help=xml_dtd_help)
- parser.add_argument("--xml-root", help=xml_root_help)
- parser.add_argument("--toml-output", "-t", dest="output_format", action="store_const", const="toml",
- help=toml_output_help)
- parser.add_argument("--version", action="version", version="%(prog)s {version}".format(version=__version__))
-
- for arg in jq_arg_spec:
- parser.add_argument(arg, nargs=jq_arg_spec[arg], dest=arg, action="append", help=argparse.SUPPRESS)
-
- parser.add_argument("jq_filter")
- parser.add_argument("input_streams", nargs="*", type=argparse.FileType(), metavar="files", default=[sys.stdin])
- return parser
-
def xq_cli():
cli(input_format="xml", program_name="xq")
@@ -128,16 +38,18 @@ def tq_cli():
cli(input_format="toml", program_name="tq")
def cli(args=None, input_format="yaml", program_name="yq"):
- parser = get_parser(program_name)
+ parser = get_parser(program_name, __doc__)
args, jq_args = parser.parse_known_args(args=args)
for i, arg in enumerate(jq_args):
if arg.startswith("-") and not arg.startswith("--"):
if "y" in arg:
args.output_format = "yaml"
+ elif "Y" in arg:
+ args.output_format = "annotated_yaml"
elif "x" in arg:
args.output_format = "xml"
- jq_args[i] = arg.replace("x", "").replace("y", "")
+ jq_args[i] = arg.replace("x", "").replace("y", "").replace("Y", "")
if args.output_format != "json":
jq_args[i] = jq_args[i].replace("C", "")
if jq_args[i] == "-":
@@ -170,8 +82,8 @@ def cli(args=None, input_format="yaml", program_name="yq"):
yq(input_format=input_format, program_name=program_name, jq_args=jq_args, **vars(args))
def yq(input_streams=None, output_stream=None, input_format="yaml", output_format="json",
- program_name="yq", width=None, indentless_lists=False, xml_root=None, xml_dtd=False, jq_args=frozenset(),
- exit_func=None):
+ program_name="yq", width=None, indentless_lists=False, xml_root=None, xml_dtd=False,
+ jq_args=frozenset(), exit_func=None):
if not input_streams:
input_streams = [sys.stdin]
if not output_stream:
@@ -195,10 +107,12 @@ def yq(input_streams=None, output_stream=None, input_format="yaml", output_forma
# TODO: enable true streaming in this branch (with asyncio, asyncproc, a multi-shot variant of
# subprocess.Popen._communicate, etc.)
# See https://stackoverflow.com/questions/375427/non-blocking-read-on-a-subprocess-pipe-in-python
+ use_annotations = True if output_format == "annotated_yaml" else False
input_docs = []
for input_stream in input_streams:
if input_format == "yaml":
- input_docs.extend(yaml.load_all(input_stream, Loader=OrderedLoader))
+ loader = get_loader(use_annotations=use_annotations)
+ input_docs.extend(yaml.load_all(input_stream, Loader=loader))
elif input_format == "xml":
import xmltodict
input_docs.append(xmltodict.parse(input_stream.read(), disable_entities=True))
@@ -210,9 +124,9 @@ def yq(input_streams=None, output_stream=None, input_format="yaml", output_forma
input_payload = "\n".join(json.dumps(doc, cls=JSONDateTimeEncoder) for doc in input_docs)
jq_out, jq_err = jq.communicate(input_payload)
json_decoder = json.JSONDecoder(object_pairs_hook=OrderedDict)
- if output_format == "yaml":
- dumper_class = OrderedIndentlessDumper if indentless_lists else OrderedDumper
- yaml.dump_all(decode_docs(jq_out, json_decoder), stream=output_stream, Dumper=dumper_class,
+ if output_format == "yaml" or output_format == "annotated_yaml":
+ yaml.dump_all(decode_docs(jq_out, json_decoder), stream=output_stream,
+ Dumper=get_dumper(use_annotations=use_annotations, indentless=indentless_lists),
width=width, allow_unicode=True, default_flow_style=False)
elif output_format == "xml":
import xmltodict
@@ -249,8 +163,9 @@ def yq(input_streams=None, output_stream=None, input_format="yaml", output_forma
toml.dump(doc, output_stream)
else:
if input_format == "yaml":
+ loader = get_loader(use_annotations=False)
for input_stream in input_streams:
- for doc in yaml.load_all(input_stream, Loader=OrderedLoader):
+ for doc in yaml.load_all(input_stream, Loader=loader):
json.dump(doc, jq.stdin, cls=JSONDateTimeEncoder)
jq.stdin.write("\n")
elif input_format == "xml":
diff --git a/yq/compat.py b/yq/compat.py
new file mode 100644
index 0000000..5649909
--- /dev/null
+++ b/yq/compat.py
@@ -0,0 +1,10 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import sys
+
+USING_PYTHON2 = True if sys.version_info < (3, 0) else False
+
+if USING_PYTHON2:
+ str = unicode # noqa
+else:
+ str = str
diff --git a/yq/dumper.py b/yq/dumper.py
new file mode 100644
index 0000000..575436d
--- /dev/null
+++ b/yq/dumper.py
@@ -0,0 +1,74 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import re
+from collections import OrderedDict
+
+import yaml
+
+from .compat import str
+from .loader import hash_key
+
+class OrderedIndentlessDumper(yaml.SafeDumper):
+ pass
+
+class OrderedDumper(yaml.SafeDumper):
+ def increase_indent(self, flow=False, indentless=False):
+ return super(OrderedDumper, self).increase_indent(flow, False)
+
+yaml_value_annotation_re = re.compile(r"^__yq_(?P<type>tag|style)_(?P<key>.+)__$")
+yaml_item_annotation_re = re.compile(r"^__yq_(?P<type>tag|style)_(?P<key>\d+)_(?P<value>.+)__$")
+
+def get_dumper(use_annotations=False, indentless=False):
+ def represent_dict(dumper, data):
+ pairs, custom_styles, custom_tags = [], {}, {}
+ for k, v in data.items():
+ if use_annotations and isinstance(k, str):
+ value_annotation = yaml_value_annotation_re.match(k)
+ if value_annotation and value_annotation.group("type") == "style":
+ custom_styles[value_annotation.group("key")] = v
+ continue
+ elif value_annotation and value_annotation.group("type") == "tag":
+ custom_tags[value_annotation.group("key")] = v
+ continue
+ pairs.append((k, v))
+ mapping = dumper.represent_mapping("tag:yaml.org,2002:map", pairs)
+ if use_annotations:
+ for k, v in mapping.value:
+ hashed_key = hash_key(k.value)
+ if hashed_key in custom_styles:
+ if isinstance(v, yaml.nodes.ScalarNode):
+ v.style = custom_styles[hashed_key]
+ elif custom_styles[hashed_key] == "flow":
+ v.flow_style = True
+ if hashed_key in custom_tags:
+ v.tag = custom_tags[hashed_key]
+ return mapping
+
+ def represent_list(dumper, data):
+ raw_list, custom_styles, custom_tags = [], {}, {}
+ for v in data:
+ if use_annotations and isinstance(v, str):
+ annotation = yaml_item_annotation_re.match(v)
+ if annotation and annotation.group("type") == "style":
+ custom_styles[annotation.group("key")] = annotation.group("value")
+ continue
+ elif annotation and annotation.group("type") == "tag":
+ custom_tags[annotation.group("key")] = annotation.group("value")
+ continue
+ raw_list.append(v)
+ sequence = dumper.represent_list(raw_list)
+ if use_annotations:
+ for i, v in enumerate(sequence.value):
+ if str(i) in custom_styles:
+ if isinstance(v, yaml.nodes.ScalarNode):
+ v.style = custom_styles[str(i)]
+ elif custom_styles[str(i)] == "flow":
+ v.flow_style = True
+ if str(i) in custom_tags:
+ v.tag = custom_tags[str(i)]
+ return sequence
+
+ dumper = OrderedIndentlessDumper if indentless else OrderedDumper
+ dumper.add_representer(OrderedDict, represent_dict)
+ dumper.add_representer(list, represent_list)
+ return dumper
diff --git a/yq/loader.py b/yq/loader.py
new file mode 100644
index 0000000..411ed86
--- /dev/null
+++ b/yq/loader.py
@@ -0,0 +1,58 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import yaml
+from base64 import b64encode
+from collections import OrderedDict
+from hashlib import sha224
+
+from .compat import str
+
+def hash_key(key):
+ return b64encode(sha224(key.encode() if isinstance(key, str) else key).digest()).decode()
+
+class OrderedLoader(yaml.SafeLoader):
+ pass
+
+def get_loader(use_annotations=False):
+ def construct_sequence(loader, node):
+ annotations = []
+ for i, v_node in enumerate(node.value):
+ if not use_annotations:
+ break
+ if v_node.tag and v_node.tag.startswith("!") and not v_node.tag.startswith("!!") and len(v_node.tag) > 1:
+ annotations.append("__yq_tag_{}_{}__".format(i, v_node.tag))
+ if isinstance(v_node, yaml.nodes.ScalarNode) and v_node.style:
+ annotations.append("__yq_style_{}_{}__".format(i, v_node.style))
+ elif isinstance(v_node, (yaml.nodes.SequenceNode, yaml.nodes.MappingNode)) and v_node.flow_style is True:
+ annotations.append("__yq_style_{}_{}__".format(i, "flow"))
+ return [loader.construct_object(i) for i in node.value] + annotations
+
+ def construct_mapping(loader, node):
+ loader.flatten_mapping(node) # TODO: is this needed?
+ pairs = []
+ for k_node, v_node in node.value:
+ key = loader.construct_object(k_node)
+ value = loader.construct_object(v_node)
+ pairs.append((key, value))
+ if not (use_annotations and isinstance(key, (str, bytes))):
+ continue
+ if v_node.tag and v_node.tag.startswith("!") and not v_node.tag.startswith("!!") and len(v_node.tag) > 1:
+ pairs.append(("__yq_tag_{}__".format(hash_key(key)), v_node.tag))
+ if isinstance(v_node, yaml.nodes.ScalarNode) and v_node.style:
+ pairs.append(("__yq_style_{}__".format(hash_key(key)), v_node.style))
+ elif isinstance(v_node, (yaml.nodes.SequenceNode, yaml.nodes.MappingNode)) and v_node.flow_style is True:
+ pairs.append(("__yq_style_{}__".format(hash_key(key)), "flow"))
+ return OrderedDict(pairs)
+
+ def parse_unknown_tags(loader, tag_suffix, node):
+ if isinstance(node, yaml.nodes.ScalarNode):
+ return loader.construct_scalar(node)
+ elif isinstance(node, yaml.nodes.SequenceNode):
+ return construct_sequence(loader, node)
+ elif isinstance(node, yaml.nodes.MappingNode):
+ return construct_mapping(loader, node)
+
+ OrderedLoader.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping)
+ OrderedLoader.add_constructor(yaml.resolver.BaseResolver.DEFAULT_SEQUENCE_TAG, construct_sequence)
+ OrderedLoader.add_multi_constructor('', parse_unknown_tags)
+ return OrderedLoader
diff --git a/yq/parser.py b/yq/parser.py
new file mode 100644
index 0000000..765a35a
--- /dev/null
+++ b/yq/parser.py
@@ -0,0 +1,73 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import sys, argparse, subprocess
+
+from .version import __version__
+
+# jq arguments that consume positionals must be listed here to avoid our parser mistaking them for our positionals
+jq_arg_spec = {"--indent": 1, "-f": 1, "--from-file": 1, "-L": 1, "--arg": 2, "--argjson": 2, "--slurpfile": 2,
+ "--argfile": 2, "--rawfile": 2, "--args": argparse.REMAINDER, "--jsonargs": argparse.REMAINDER}
+
+class Parser(argparse.ArgumentParser):
+ def print_help(self):
+ yq_help = argparse.ArgumentParser.format_help(self).splitlines()
+ print("\n".join(["usage: yq [options] <jq filter> [YAML file...]"] + yq_help[1:] + [""]))
+ try:
+ subprocess.check_call(["jq", "--help"])
+ except Exception:
+ pass
+
+def get_parser(program_name, description):
+ # By default suppress these help strings and only enable them in the specific programs.
+ yaml_output_help, yaml_roundtrip_help, width_help, indentless_help = (argparse.SUPPRESS, argparse.SUPPRESS,
+ argparse.SUPPRESS, argparse.SUPPRESS)
+ xml_output_help, xml_dtd_help, xml_root_help = argparse.SUPPRESS, argparse.SUPPRESS, argparse.SUPPRESS
+ toml_output_help = argparse.SUPPRESS
+
+ if program_name == "yq":
+ current_language = "YAML"
+ yaml_output_help = "Transcode jq JSON output back into YAML and emit it"
+ yaml_roundtrip_help = """Transcode jq JSON output back into YAML and emit it.
+Preserve YAML tags and styles by representing them as extra items
+in their enclosing mappings and sequences while in JSON. This option
+is incompatible with jq filters that do not expect these extra items."""
+ width_help = "When using --yaml-output, specify string wrap width"
+ indentless_help = """When using --yaml-output, indent block style lists (sequences)
+with 0 spaces instead of 2"""
+ elif program_name == "xq":
+ current_language = "XML"
+ xml_output_help = "Transcode jq JSON output back into XML and emit it"
+ xml_dtd_help = "Preserve XML Document Type Definition (disables streaming of multiple docs)"
+ xml_root_help = "When transcoding back to XML, envelope the output in an element with this name"
+ elif program_name == "tq":
+ current_language = "TOML"
+ toml_output_help = "Transcode jq JSON output back into TOML and emit it"
+ else:
+ raise Exception("Unknown program name")
+
+ description = description.replace("yq", program_name).replace("YAML", current_language)
+ parser_args = dict(prog=program_name, description=description, formatter_class=argparse.RawTextHelpFormatter)
+ if sys.version_info >= (3, 5):
+ parser_args.update(allow_abbrev=False) # required to disambiguate options listed in jq_arg_spec
+ parser = Parser(**parser_args)
+ parser.add_argument("--output-format", default="json", help=argparse.SUPPRESS)
+ parser.add_argument("--yaml-output", "--yml-output", "-y", dest="output_format", action="store_const", const="yaml",
+ help=yaml_output_help)
+ parser.add_argument("--yaml-roundtrip", "--yml-roundtrip", "-Y", dest="output_format", action="store_const",
+ const="annotated_yaml", help=yaml_roundtrip_help)
+ parser.add_argument("--width", "-w", type=int, help=width_help)
+ parser.add_argument("--indentless-lists", "--indentless", action="store_true", help=indentless_help)
+ parser.add_argument("--xml-output", "-x", dest="output_format", action="store_const", const="xml",
+ help=xml_output_help)
+ parser.add_argument("--xml-dtd", action="store_true", help=xml_dtd_help)
+ parser.add_argument("--xml-root", help=xml_root_help)
+ parser.add_argument("--toml-output", "-t", dest="output_format", action="store_const", const="toml",
+ help=toml_output_help)
+ parser.add_argument("--version", action="version", version="%(prog)s {version}".format(version=__version__))
+
+ for arg in jq_arg_spec:
+ parser.add_argument(arg, nargs=jq_arg_spec[arg], dest=arg, action="append", help=argparse.SUPPRESS)
+
+ parser.add_argument("jq_filter")
+ parser.add_argument("input_streams", nargs="*", type=argparse.FileType(), metavar="files", default=[sys.stdin])
+ return parser