# match builtin [match("( )*"; "g")] "abc" [{"offset":0,"length":0,"string":"","captures":[{"offset":0,"string":"","length":0,"name":null}]},{"offset":1,"length":0,"string":"","captures":[{"offset":1,"string":"","length":0,"name":null}]},{"offset":2,"length":0,"string":"","captures":[{"offset":2,"string":"","length":0,"name":null}]},{"offset":3,"length":0,"string":"","captures":[{"offset":3,"string":"","length":0,"name":null}]}] [match("( )*"; "gn")] "abc" [] [match(""; "g")] "ab" [{"offset":0,"length":0,"string":"","captures":[]},{"offset":1,"length":0,"string":"","captures":[]},{"offset":2,"length":0,"string":"","captures":[]}] [match("a"; "gi")] "āáàä" [] [match(["(bar)"])] "foo bar" [{"offset": 4, "length": 3, "string": "bar", "captures":[{"offset": 4, "length": 3, "string": "bar", "name": null}]}] # offsets account for combining codepoints and multi-byte UTF-8 [match("bar")] "ā bar with a combining codepoint U+0304" [{"offset": 3, "length": 3, "string": "bar", "captures":[]}] # matches with combining codepoints still count them in their length [match("bār")] "a bār" [{"offset": 2, "length": 4, "string": "bār", "captures":[]}] [match(".+?\\b")] "ā two-codepoint grapheme" [{"offset": 0, "length": 2, "string": "ā", "captures":[]}] [match(["foo (?bar)? foo", "ig"])] "foo bar foo foo foo" [{"offset": 0, "length": 11, "string": "foo bar foo", "captures":[{"offset": 4, "length": 3, "string": "bar", "name": "bar123"}]},{"offset":12, "length": 8, "string": "foo foo", "captures":[{"offset": -1, "length": 0, "string": null, "name": "bar123"}]}] #test builtin [test("( )*"; "gn")] "abc" [false] [test("ā")] "ā" [true] capture("(?[a-z]+)-(?[0-9]+)") "xyzzy-14" {"a":"xyzzy","n":"14"} # jq-coded utilities built on match: # # The second element in these tests' inputs tests the case where the # fromstring matches both the head and tail of the string [.[] | sub(", "; ":")] ["a,b, c, d, e,f", ", a,b, c, d, e,f, "] ["a,b:c, d, e,f",":a,b, c, d, e,f, "] sub("^(?.)"; "Head=\(.head) Tail=") "abcdef" "Head=a Tail=bcdef" [.[] | gsub(", "; ":")] ["a,b, c, d, e,f",", a,b, c, d, e,f, "] ["a,b:c:d:e,f",":a,b:c:d:e,f:"] gsub("(?\\d)"; ":\(.d);") "a1b2" "a:1;b:2;" gsub("a";"b") "aaaaa" "bbbbb" gsub("(.*)"; ""; "x") "" "" gsub(""; "a"; "g") "" "a" gsub("^"; ""; "g") "a" "a" gsub(""; "a"; "g") "a" "aaa" gsub("$"; "a"; "g") "a" "aa" gsub("^"; "a") "" "a" gsub("(?=u)"; "u") "qux" "quux" gsub("^.*a"; "b") "aaa" "b" gsub("^.*?a"; "b") "aaa" "baa" # The following is for regression testing and should not be construed as a requirement: [gsub("a"; "b", "c")] "a" ["b","c"] [.[] | scan(", ")] ["a,b, c, d, e,f",", a,b, c, d, e,f, "] [", ",", ",", ",", ",", ",", ",", ",", "] [.[]|[[sub(", *";":")], [gsub(", *";":")], [scan(", *")]]] ["a,b, c, d, e,f",", a,b, c, d, e,f, "] [[["a:b, c, d, e,f"],["a:b:c:d:e:f"],[",",", ",", ",", ",","]],[[":a,b, c, d, e,f, "],[":a:b:c:d:e:f:"],[", ",",",", ",", ",", ",",",", "]]] [.[]|[[sub(", +";":")], [gsub(", +";":")], [scan(", +")]]] ["a,b, c, d, e,f",", a,b, c, d, e,f, "] [[["a,b:c, d, e,f"],["a,b:c:d:e,f"],[", ",", ",", "]],[[":a,b, c, d, e,f, "],[":a,b:c:d:e,f:"],[", ",", ",", ",", ",", "]]] [.[] | scan("b+"; "i")] ["","bBb","abcABBBCabbbc"] ["bBb","b","BBB","bbb"] # reference to named captures gsub("(?.)[^a]*"; "+\(.x)-") "Abcabc" "+A-+a-" gsub("(?.)(?[0-9])"; "\(.x|ascii_downcase)\(.y)") "A1 B2 CD" "a1 b2 CD" gsub("\\b(?.)"; "\(.x|ascii_downcase)") "ABC DEF" "aBC dEF" gsub("[^a-z]*(?[a-z]*)"; "Z\(.x)") "123foo456bar" "ZfooZbarZ" # utf-8 sub("(?.)"; "\(.x)!") "’" "’!" [sub("a"; "b", "c")] "a" ["b","c"] [sub("(?.)"; "\(.a|ascii_upcase)", "\(.a|ascii_downcase)", "c")] "aB" ["AB","aB","cB"] [gsub("(?.)"; "\(.a|ascii_upcase)", "\(.a|ascii_downcase)", "c")] "aB" ["AB","ab","cc"] # splits and _nwise [splits("")] "ab" ["","a","b",""]