diff --git a/evaluator/builtin_functions_test.go b/evaluator/builtin_functions_test.go index 844dee9..4f0b093 100644 --- a/evaluator/builtin_functions_test.go +++ b/evaluator/builtin_functions_test.go @@ -736,6 +736,73 @@ func TestPartition(t *testing.T) { testBuiltinFunction(tests, t) } +func TestRegex(t *testing.T) { + tests := []Tests{ + {`r = regex(""); r.type()`, "REGEX"}, + {`regex("a").str()`, "a"}, + {`r = regex("a(x+)b"); r.str()`, "a(x+)b"}, + {`regex(")")`, "error parsing regexp: unexpected ): `)`"}, + } + + testBuiltinFunction(tests, t) +} + +func TestMatches(t *testing.T) { + tests := []Tests{ + {`r = regex(""); r.matches("")`, true}, + {`"ax*b".matches("ab")`, true}, + {`r = regex("ax*b"); r.matches("xaby")`, true}, + {`"ax*b".matches("ax")`, false}, + {`r = regex("ax*b"); r.matches("xb")`, false}, + {`"ax+b".matches("ab")`, false}, + {`")".matches("x")`, "error parsing regexp: unexpected ): `)`"}, + } + + testBuiltinFunction(tests, t) +} + +func TestMatch(t *testing.T) { + tests := []Tests{ + {`"..".match("ab")`, []string{"ab"}}, + {`"(.)(.)".match("ab")`, []string{"ab", "a", "b"}}, + {`r = regex("a(y*)b"); r.match("xaybz")`, []string{"ayb", "y"}}, + {`"abc".match("xyz")`, nil}, + {`r = regex("a(y+)b"); r.match("ab")`, nil}, + {`")".match("x")`, "error parsing regexp: unexpected ): `)`"}, + } + + testBuiltinFunction(tests, t) +} + +// testBuiltinFunction does ot do nested arrays as results, so we get creative +func TestMatchAll(t *testing.T) { + tests := []Tests{ + {`".".match_all("abc")[2]`, []string{"c"}}, + {`r = regex("ax*b"); r.match_all("a ab ax xb axb").len()`, 2}, + {`"a(x*)b".match_all("a ab ax xb axb").len()`, 2}, + {`"a(x*)b".match_all("a ab ax xb axb")[0]`, []string{"ab", ""}}, + {`"a(x*)b".match_all("a ab ax xb axb")[1]`, []string{"axb", "x"}}, + {`")".match_all("x")`, "error parsing regexp: unexpected ): `)`"}, + } + + testBuiltinFunction(tests, t) +} + +func TestReplaceMatch(t *testing.T) { + tests := []Tests{ + {`".".replace_match("xyz", ".")`, "..."}, + {`r = regex("x+"); r.replace_match("x xx xxx", f(x) { x.len() })`, "1 2 3"}, + {`".".replace_match("a", f(x) {})`, ""}, + {`r = regex("a(x*)b"); r.replace_match("a ab axb axxxb", ".")`, "a . . ."}, + {`"a(x*)b".replace_match("a ab axb axxxb", f(x) {x.len()})`, "a 2 3 5"}, + {`"a(x*)b".replace_match("a ab axb axxxb", len)`, "a 2 3 5"}, + {`"a(x*)b".replace_match("a ab axb axxxb", "\$1")`, "a x xxx"}, + {`")".replace_match("x", ".")`, "error parsing regexp: unexpected ): `)`"}, + } + + testBuiltinFunction(tests, t) +} + func testBuiltinFunction(tests []Tests, t *testing.T) { for _, tt := range tests { evaluated := testEval(tt.input) diff --git a/evaluator/functions.go b/evaluator/functions.go index 25cdbd1..6e18557 100644 --- a/evaluator/functions.go +++ b/evaluator/functions.go @@ -13,6 +13,7 @@ import ( "os/exec" "os/user" "path/filepath" + "regexp" "sort" "strconv" "strings" @@ -426,6 +427,36 @@ func GetFns() map[string]*object.Builtin { Fn: lastIndexFn, Doc: "returns the last position at which a string is found within another string", }, + // regex("ax+b") + "regex": &object.Builtin{ + Types: []string{object.STRING_OBJ}, + Fn: regexFn, + Doc: "returns a string compiled to a regular expression", + }, + // matches(pattern, "abc") + "matches": &object.Builtin{ + Types: []string{object.STRING_OBJ, object.REGEX_OBJ}, + Fn: matchesFn, + Doc: "checks whether a string matches a pattern", + }, + // match(pattern, "abc") + "match": &object.Builtin{ + Types: []string{object.STRING_OBJ, object.REGEX_OBJ}, + Fn: matchFn, + Doc: "match a pattern against a string and return the first match and all submatches", + }, + // match_all(pattern, "abc") + "match_all": &object.Builtin{ + Types: []string{object.STRING_OBJ, object.REGEX_OBJ}, + Fn: match_allFn, + Doc: "match a pattern against a string and return the all matches and their submatches", + }, + // replace_match(pattern, "abc", "repl") or replace_match(pattern, "abc", fn) + "replace_match": &object.Builtin{ + Types: []string{object.STRING_OBJ, object.REGEX_OBJ}, + Fn: replace_matchFn, + Doc: "replace all matches of a pattern in a string", + }, // shift([1,2,3]) "shift": &object.Builtin{ Types: []string{object.ARRAY_OBJ}, @@ -1971,6 +2002,147 @@ func lastIndexFn(tok token.Token, env *object.Environment, args ...object.Object return &object.Number{Token: tok, Value: float64(i)} } +// regex("ax+b") +func regexFn(tok token.Token, env *object.Environment, args ...object.Object) object.Object { + err := validateArgs(tok, "regex", args, 1, [][]string{{object.STRING_OBJ}}) + if err != nil { + return err + } + + re, nok := regexp.Compile(args[0].(*object.String).Value) + + if nok != nil { + return newError(tok, "%s", nok.Error()) + } + + return &object.Regex{Token: tok, Value: re, Source: args[0].(*object.String).Value} +} + +func getRegexpObj(obj object.Object) object.Object { + switch obj.Type() { + case object.REGEX_OBJ: + return obj + case object.STRING_OBJ: + re, err := regexp.Compile(obj.(*object.String).Value) + if err != nil { + return newError(tok, "%s", err) + } + return &object.Regex{Token: tok, Value: re, Source: obj.(*object.String).Value} + default: + return newError(tok, "invalid argument type") // should not get here + } +} + +// matches(pattern, "abc") +func matchesFn(tok token.Token, env *object.Environment, args ...object.Object) object.Object { + err := validateArgs(tok, "last_index", args, 2, [][]string{{object.STRING_OBJ, object.REGEX_OBJ}, {object.STRING_OBJ}}) + if err != nil { + return err + } + + re := getRegexpObj(args[0]) + if isError(re) { + return re + } + + m := re.(*object.Regex).Value.MatchString(args[1].(*object.String).Value) + + return &object.Boolean{Token: tok, Value: m} +} + +// match(pattern, "abc") +func matchFn(tok token.Token, env *object.Environment, args ...object.Object) object.Object { + err := validateArgs(tok, "last_index", args, 2, [][]string{{object.STRING_OBJ, object.REGEX_OBJ}, {object.STRING_OBJ}}) + if err != nil { + return err + } + + re := getRegexpObj(args[0]) + if isError(re) { + return re + } + + match := re.(*object.Regex).Value.FindStringSubmatch(args[1].(*object.String).Value) + + length := len(match) + if length == 0 { + return NULL + } + newElements := make([]object.Object, length, length) + + for i, s := range match { + newElements[i] = &object.String{Token: tok, Value: s} + } + + return &object.Array{Token: tok, Elements: newElements} +} + +// match_all(pattern, "abc") +func match_allFn(tok token.Token, env *object.Environment, args ...object.Object) object.Object { + err := validateArgs(tok, "last_index", args, 2, [][]string{{object.STRING_OBJ, object.REGEX_OBJ}, {object.STRING_OBJ}}) + if err != nil { + return err + } + + re := getRegexpObj(args[0]) + if isError(re) { + return re + } + + allMatches := re.(*object.Regex).Value.FindAllStringSubmatch(args[1].(*object.String).Value, -1) + + length := len(allMatches) + if length == 0 { + return NULL + } + newMatches := make([]object.Object, length, length) + + for i, match := range allMatches { + length := len(match) + newElements := make([]object.Object, length, length) + + for j, s := range match { + newElements[j] = &object.String{Token: tok, Value: s} + } + + newMatches[i] = &object.Array{Token: tok, Elements: newElements} + } + + return &object.Array{Token: tok, Elements: newMatches} +} + +// replace_match(pattern, "abc", "repl") or replace_match(pattern, "abc", fn) +func replace_matchFn(tok token.Token, env *object.Environment, args ...object.Object) object.Object { + err := validateArgs(tok, "last_index", args, 3, [][]string{{object.STRING_OBJ, object.REGEX_OBJ}, {object.STRING_OBJ}, {object.STRING_OBJ, object.FUNCTION_OBJ, object.BUILTIN_OBJ}}) + if err != nil { + return err + } + + re := getRegexpObj(args[0]) + if isError(re) { + return re + } + + var s string + if args[2].Type() == object.STRING_OBJ { + s = re.(*object.Regex).Value.ReplaceAllString(args[1].(*object.String).Value, args[2].(*object.String).Value) + } else { + replacefn := func(m string) string { + cargs := make([]object.Object, 1, 1) + cargs[0] = &object.String{Token: tok, Value: m} + r := applyFunction(tok, args[2], env, cargs) + if r != nil { + return r.Inspect() + } + return "" + } + + s = re.(*object.Regex).Value.ReplaceAllStringFunc(args[1].(*object.String).Value, replacefn) + } + + return &object.String{Token: tok, Value: s} +} + // Clamps start and end arguments to the slice // function. When you slice "abc" you can have // start 10 and end -20... diff --git a/object/object.go b/object/object.go index b4b5776..ec7ac33 100644 --- a/object/object.go +++ b/object/object.go @@ -4,6 +4,7 @@ import ( "bytes" "fmt" "os/exec" + "regexp" "sort" "strconv" "strings" @@ -24,6 +25,7 @@ const ( NUMBER_OBJ = "NUMBER" BOOLEAN_OBJ = "BOOLEAN" STRING_OBJ = "STRING" + REGEX_OBJ = "REGEX" RETURN_VALUE_OBJ = "RETURN_VALUE" @@ -229,6 +231,16 @@ func (s *String) HashKey() HashKey { return HashKey{Type: s.Type(), Value: s.Value} } +type Regex struct { + Token token.Token + Value *regexp.Regexp + Source string +} + +func (r *Regex) Type() ObjectType { return REGEX_OBJ } +func (r *Regex) Inspect() string { return r.Source } +func (r *Regex) Json() string { return `"` + strings.ReplaceAll(r.Inspect(), `"`, `\"`) + `"` } + // Function that ensure a mutex // instance is created on the // string