summaryrefslogtreecommitdiffstats
path: root/builtin.c
diff options
context:
space:
mode:
authorpkoppstein <pkoppstein@gmail.com>2014-07-31 20:32:44 -0400
committerNicolas Williams <nico@cryptonector.com>2014-08-08 17:00:14 -0500
commita696c6b551879c7a9d16cfaa867c6f1bec57e6f8 (patch)
treefbc3d7c676db862eb427c3bad351ddef17023e90 /builtin.c
parent0d437e25de7d14dc780fd152e86e0414a027a2f5 (diff)
regex filters (#432): scan, splits, split, sub, gsub
Diffstat (limited to 'builtin.c')
-rw-r--r--builtin.c77
1 files changed, 66 insertions, 11 deletions
diff --git a/builtin.c b/builtin.c
index 4fb496cd..8acde3a6 100644
--- a/builtin.c
+++ b/builtin.c
@@ -974,23 +974,78 @@ static const char* const jq_builtins[] = {
"def flatten: reduce .[] as $i ([]; if $i | type == \"array\" then . + ($i | flatten) else . + [$i] end);",
"def flatten(x): x as $x | reduce .[] as $i ([]; if $i | type == \"array\" and $x > 0 then . + ($i | flatten($x-1)) else . + [$i] end);",
"def range(x): x as $x | range(0;$x);",
- // regular expressions:
"def match(re; mode): _match_impl(re; mode; false)|.[];",
"def match(val): (val|type) as $vt | if $vt == \"string\" then match(val; null)"
- " elif $vt == \"array\" and (val | length) > 1 then match(val[0]; val[1])"
- " elif $vt == \"array\" and (val | length) > 0 then match(val[0]; null)"
- " else error( $vt + \" not a string or array\") end;",
+ " elif $vt == \"array\" and (val | length) > 1 then match(val[0]; val[1])"
+ " elif $vt == \"array\" and (val | length) > 0 then match(val[0]; null)"
+ " else error( $vt + \" not a string or array\") end;",
"def test(re; mode): _match_impl(re; mode; true);",
"def test(val): (val|type) as $vt | if $vt == \"string\" then test(val; null)"
- " elif $vt == \"array\" and (val | length) > 1 then test(val[0]; val[1])"
- " elif $vt == \"array\" and (val | length) > 0 then test(val[0]; null)"
- " else error( $vt + \" not a string or array\") end;",
- // Ex.: "a1" | capture( "(?<x>[a-z*])" ).x => "a"
+ " elif $vt == \"array\" and (val | length) > 1 then test(val[0]; val[1])"
+ " elif $vt == \"array\" and (val | length) > 0 then test(val[0]; null)"
+ " else error( $vt + \" not a string or array\") end;",
"def capture(re; mods): match(re; mods) | reduce ( .captures | .[] | select(.name != null) | { (.name) : .string } ) as $pair ({}; . + $pair);",
"def capture(val): (val|type) as $vt | if $vt == \"string\" then capture(val; null)"
- " elif $vt == \"array\" and (val | length) > 1 then capture(val[0]; val[1])"
- " elif $vt == \"array\" and (val | length) > 0 then capture(val[0]; null)"
- " else error( $vt + \" not a string or array\") end;",
+ " elif $vt == \"array\" and (val | length) > 1 then capture(val[0]; val[1])"
+ " elif $vt == \"array\" and (val | length) > 0 then capture(val[0]; null)"
+ " else error( $vt + \" not a string or array\") end;",
+ "def scan(re):"
+ " match(re; \"g\")"
+ " | if (.captures|length > 0)"
+ " then [ .captures | .[] | .string ]"
+ " else .string"
+ " end ;",
+ //
+ // If input is an array, then emit a stream of successive subarrays of length n (or less),
+ // and similarly for strings.
+ "def nwise(a; n): if a|length <= n then a else a[0:n] , nwise(a[n:]; n) end;",
+ "def nwise(n): nwise(.; n);",
+ //
+ // splits/1 produces a stream; split/1 is retained for backward compatibility.
+ "def splits(re; flags): . as $s"
+ // # multiple occurrences of "g" are acceptable
+ " | [ match(re; \"g\" + flags) | (.offset, .offset + .length) ]"
+ " | [0] + . +[$s|length]"
+ " | nwise(2)"
+ " | $s[.[0]:.[1] ] ;",
+ "def splits(re): splits(re; null);",
+ //
+ // split emits an array for backward compatibility
+ "def split(re; flags): [ splits(re; flags) ];",
+ "def split(re): [ splits(re; null) ];",
+ //
+ // If s contains capture variables, then create a capture object and pipe it to s
+ "def sub(re; s):"
+ " . as $in"
+ " | [match(re)]"
+ " | .[0]"
+ " | . as $r"
+ // # create the \"capture\" object:
+ " | reduce ( $r | .captures | .[] | select(.name != null) | { (.name) : .string } ) as $pair"
+ " ({}; . + $pair)"
+ " | if . == {} then $in | .[0:$r.offset]+s+.[$r.offset+$r.length:]"
+ " else (. | s)"
+ " end ;",
+ //
+ // repeated substitution of re (which may contain named captures)
+ "def gsub(re; s):"
+ // # _stredit(edits;s) - s is the \"to\" string, which might contain capture variables,
+ // # so if an edit contains captures, then create the capture object and pipe it to s
+ " def _stredit(edits; s):"
+ " if (edits|length) == 0 then ."
+ " else . as $in"
+ " | (edits|length -1) as $l"
+ " | (edits[$l]) as $edit"
+ // # create the \"capture\" object:
+ " | ($edit | reduce ( $edit | .captures | .[] | select(.name != null) | { (.name) : .string } ) as $pair"
+ " ({}; . + $pair) )"
+ " | if . == {} then $in | .[0:$edit.offset]+s+.[$edit.offset+$edit.length:] | _stredit(edits[0:$l]; s)"
+ " else (if $l == 0 then \"\" else ($in | _stredit(edits[0:$l]; s)) end) + (. | s)"
+ " end"
+ " end ;"
+ " [match(re;\"g\")] as $edits | _stredit($edits; s) ;",
+
+ //#######################################################################
// range/3, with a `by` expression argument
"def range(init; upto; by): "
" init as $init |"