diff --git a/src/parse/asp/builtins.go b/src/parse/asp/builtins.go index c34479f30..657d2783f 100644 --- a/src/parse/asp/builtins.go +++ b/src/parse/asp/builtins.go @@ -25,6 +25,24 @@ import ( // A nativeFunc is a function that implements a builtin function natively. type nativeFunc func(*scope, []pyObject) pyObject +// A strFormatRefMode is a mode by which the replacement fields in a str.format format string may refer to the remaining +// arguments passed to str.format. +type strFormatRefMode int + +const ( + // strFormatByOrder indicates that replacement fields refer to positional arguments in the order in which they were + // given to str.format. Fields are numbered automatically; the field names themselves are therefore empty (i.e. `{}`). + strFormatByOrder strFormatRefMode = iota + + // strFormatByPosition indicates that replacement fields refer to positional arguments in an arbitrary order. Field + // names are numerical and zero-indexed (e.g. `{0}`, `{1}`). + strFormatByPosition + + // strFormatByName indicates that replacement fields refer to the names of keyword arguments. Field names are therefore + // function parameters (e.g. `{name}`, `{id}`). + strFormatByName +) + // registerBuiltins sets up the "special" builtins that map to native code. func registerBuiltins(s *scope) { const varargs = true @@ -582,48 +600,97 @@ func strRFind(s *scope, args []pyObject) pyObject { return newPyInt(strings.LastIndex(string(self), string(needle))) } +// strFormat implements the str.format function. It interpolates a format string using the arguments passed to +// str.format and returns the interpolated string. +// +// Format strings may contain literal text or replacement fields delimited by `{` and `}` characters. The string between +// these delimiters - the "field name" - denotes which argument to str.format should be substituted in place of the +// field. strFormat supports three reference modes within field names - see [strFormatByOrder], [strFormatByPosition] +// and [strFormatByName]. Only one reference mode may be used per format string. `{{` and `}}` are treated as the +// literal characters `{` and `}` respectively. func strFormat(s *scope, args []pyObject) pyObject { self := string(args[0].(pyString)) var buf strings.Builder - buf.Grow(len(self)) // most reasonable guess available as to how big it might be + buf.Grow(len(self)) // This is our best possible guess as to how big the output might be. + + // Decide initially whether to parse replacement field names as numbers (which is only permitted if str.format is + // called with positional arguments) or as names (which is only permitted if it is called with keyword arguments). + // + // Although we also support automatic field numbering, both that and numerical replacement fields require str.format to + // be called with positional arguments, so it is impossible to tell whether we should use automatic field numbering + // until we parse the first field name in the format string. args will always contain at least one element (the format + // string). For now, assume we're using numerical replacement fields if it contains more than one element; otherwise, + // assume we're using named replacement fields. + mode := strFormatByPosition + if len(args) == 1 { + mode = strFormatByName + } + + posArgs := len(args) - 1 + nextPosIndex := 0 - arg := 1 // what arg index are we up to for positional args + // This loop consumes the format string from left to right: for { - start := strings.IndexByte(self, '{') + // Locate the first (opening or closing) delimiter in the remainder of the format string. + start := strings.IndexAny(self, "{}") + // If there are no more delimiters remaining in the format string, output the rest of the format string and end + // processing of the format string. if start == -1 { buf.WriteString(self) break } - end := strings.IndexByte(self[start:], '}') - if end == -1 { - // We may want to error here in some future revision - buf.WriteString(self) - break + // If this delimiter is an escape character for a delimiter that immediately follows it, output only one of them and + // return to the start of the loop to continue processing the format string. + if start < len(self)-1 && self[start] == self[start+1] { + buf.WriteString(self[:start+1]) + self = self[start+2:] + continue } + // We now know we're about to begin parsing a field name, which means this character must be the "{" delimiter. + s.Assert(self[start] == '{', "single '}' encountered in format string") + // Find the corresponding "}" delimiter... + end := strings.IndexByte(self[start+1:], '}') + // ...and if there isn't one, this must be a malformed format string. + s.Assert(end != -1, "single '{' encountered in format string") + // Locate the "}" delimiter relative to the start of the format string, rather than to the first "{" delimiter. + end = start + end + 1 + // Output everything in the format string up to, but not including, the first "{" delimiter. buf.WriteString(self[:start]) - end = start + end - if start > 0 && self[start-1] == '$' { - // Don't interpolate ${X} (but ${{X}} -> ${X}) - if self[start+1] == '{' { - buf.WriteString(self[start+1 : end]) - } else if start == end-1 { - // ${} interpolates as $ + positional arg - s.Assert(arg < len(args), "format string specifies at least %d positional arguments, but only %d were supplied", arg, len(args)-1) - buf.WriteString(args[arg].String()) - arg++ - } else { - buf.WriteString(self[start : end+1]) - } - } else if key := self[start+1 : end]; key == "" { - s.Assert(arg < len(args), "format string specifies at least %d positional arguments, but only %d were supplied", arg, len(args)-1) - buf.WriteString(args[arg].String()) - arg++ - } else if val, present := s.locals[key]; present { + // Extract the replacement field's name, excluding the delimiters. + fieldName := self[start+1 : end] + // Now we can decide whether to rely on automatic field numbering. This is a one-time decision that is only taken if + // str.format was called with positional arguments and the first field name we encounter is empty; after this, empty + // field names are otherwise format string errors. + if fieldName == "" && mode == strFormatByPosition && nextPosIndex == 0 { + mode = strFormatByOrder + } + // Decide how to interpret this field name: + switch mode { + case strFormatByOrder: + // With automatic field numbering, output the next positional argument. Field names must be empty, and there cannot + // be more of them than there are positional arguments (although there may ultimately be fewer). Internally, the + // first positional argument is the format string - the positional arguments from the caller's perspective are + // shifted one to the right in args. + s.Assert(fieldName == "", "cannot switch from automatic field numbering to manual field specification") + s.Assert(nextPosIndex < posArgs, "replacement index %d out of range for positional arguments", nextPosIndex) + buf.WriteString(args[nextPosIndex+1].String()) + nextPosIndex++ + case strFormatByPosition: + // With numerical replacement fields, parse the field name as an integer i and output the string value of the + // (zero-indexed) i'th positional argument. Internally, the first positional argument is the format string - the + // positional arguments from the caller's perspective are shifted one to the right in args. + i, err := strconv.Atoi(fieldName) + s.Assert(err == nil, "must use numerical replacement fields with positional arguments") + s.Assert(0 <= i && i < posArgs, "replacement index %d out of range for positional arguments", i) + buf.WriteString(args[i+1].String()) + case strFormatByName: + // With named replacement fields, output the string value of the keyword argument with the given name. + s.Assert(fieldName != "", "must use named replacement fields with keyword arguments") + val, exists := s.locals[fieldName] + s.Assert(exists, "unspecified keyword argument '%s'", fieldName) buf.WriteString(val.String()) - } else { - // We may want to error here in some future revision - buf.WriteString(self[start : end+1]) } + // Advance beyond the closing delimiter for this field, and parse what remains in the format string. self = self[end+1:] } return pyString(buf.String()) diff --git a/src/parse/asp/builtins_bench_test.go b/src/parse/asp/builtins_bench_test.go index 9b827379b..db63aa0b5 100644 --- a/src/parse/asp/builtins_bench_test.go +++ b/src/parse/asp/builtins_bench_test.go @@ -7,12 +7,16 @@ import ( func BenchmarkStrFormat(b *testing.B) { s := &scope{ locals: map[string]pyObject{ - "spam": pyString("abc"), - "eggs": pyString("def"), + "one": pyString("123"), + "two": pyString("456"), + "spam": pyString("abc"), + "eggs": pyString("def"), + "wibble": pyString("ghi"), + "wobble": pyString("jkl"), }, } args := []pyObject{ - pyString("test {} {spam} ${wibble} {} {eggs} {wobble}"), pyString("123"), pyString("456"), + pyString("test {one} {spam} ${wibble} {two} {eggs} {wobble}"), } b.ReportAllocs() @@ -52,11 +56,11 @@ export PKG=$PKG export NAME=$(echo $NAME | sed -E 's/^_(.*)#.*$/\1/') EOF cat << 'EOF' >> $OUT -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -export TF_DATA_DIR="/tmp/plz/terraform/${PKG}_${NAME}" +DIR="$( cd "$( dirname "${{BASH_SOURCE[0]}}" )" >/dev/null 2>&1 && pwd )" +export TF_DATA_DIR="/tmp/plz/terraform/${{PKG}}_${{NAME}}" >&2 echo "-> using $TF_DATA_DIR" rm -rf "$TF_DATA_DIR" && mkdir -p "$TF_DATA_DIR" -trap "rm -rf ${TF_DATA_DIR}" EXIT +trap "rm -rf ${{TF_DATA_DIR}}" EXIT tar xfz $DIR/$(location {tarball_target}) -C $TF_DATA_DIR mkdir -p $TF_DATA_DIR/.terraform.d/plugins/linux_amd64 {plugins_extract_script} @@ -76,9 +80,9 @@ else fi if [ -n "$tfregistry_token" ]; then cat < $TF_DATA_DIR/.terraformrc -credentials "terraform.external.thoughtmachine.io" { +credentials "terraform.external.thoughtmachine.io" {{ token = "$tfregistry_token" -} +}} EOC else cat <&2 @@ -91,8 +95,8 @@ else WARN fi -export TF_CLI_ARGS_plan="${TF_CLI_ARGS_plan:-} -lock-timeout=60s" -export TF_CLI_ARGS_apply="${TF_CLI_ARGS_apply:-} -lock-timeout=60s" +export TF_CLI_ARGS_plan="${{TF_CLI_ARGS_plan:-}} -lock-timeout=60s" +export TF_CLI_ARGS_apply="${{TF_CLI_ARGS_apply:-}} -lock-timeout=60s" export CWD=$(pwd) TERRAFORM_CLI="$CWD/$(out_location {terraform_cli})" var_file_paths=({var_file_paths_csv}) @@ -102,7 +106,7 @@ then TERRAFORM_CLI="$CWD/$(location {terraform_cli})" var_file_paths=({var_file_paths_csv_sandbox}) data_paths=({data_paths_csv_sandbox}) -trap 'echo -n $? 1>&3 && rm -rf ${TF_DATA_DIR} && exit 0' EXIT +trap 'echo -n $? 1>&3 && rm -rf ${{TF_DATA_DIR}} && exit 0' EXIT fi var_flags="" for i in "${{!var_file_paths[@]}}" diff --git a/src/parse/asp/builtins_test.go b/src/parse/asp/builtins_test.go index dbe3764f6..4777fedf6 100644 --- a/src/parse/asp/builtins_test.go +++ b/src/parse/asp/builtins_test.go @@ -53,57 +53,6 @@ func TestTag(t *testing.T) { assert.Equal(t, res.String(), "_name#foo_bar") } -func TestStrFormat(t *testing.T) { - s := &scope{ - locals: map[string]pyObject{ - "spam": pyString("abc"), - "eggs": pyString("def"), - }, - } - - assert.EqualValues(t, "test 123 abc ${wibble} 456 def {wobble}", strFormat(s, []pyObject{ - pyString("test {} {spam} ${wibble} {} {eggs} {wobble}"), pyString("123"), pyString("456"), - })) -} - -func TestStrFormat2(t *testing.T) { - s := &scope{ - locals: map[string]pyObject{ - "owner": pyString("please-build"), - "plugin": pyString("java-rules"), - "revision": pyString("v0.3.0"), - }, - } - - assert.EqualValues(t, "https://github.com/please-build/java-rules/archive/v0.3.0.zip", strFormat(s, []pyObject{ - pyString("https://github.com/{owner}/{plugin}/archive/{revision}.zip"), - })) -} - -func TestStrFormat3(t *testing.T) { - s := &scope{ - locals: map[string]pyObject{ - "url_base": pyString("https://please.build/py-wheels"), - "package_name": pyString("coverage"), - "version": pyString("5.5"), - }, - } - - assert.EqualValues(t, "https://please.build/py-wheels/coverage-5.5-${OS}_${ARCH}.whl", strFormat(s, []pyObject{ - pyString("{url_base}/{package_name}-{version}-${{OS}}_${{ARCH}}.whl"), - })) -} - -func TestStrFormat4(t *testing.T) { - s := &scope{ - locals: map[string]pyObject{}, - } - - assert.EqualValues(t, `echo "tools/images/please_ubuntu@$please_ubuntu_digest" > $OUT`, strFormat(s, []pyObject{ - pyString(`echo "{}@${}" > $OUT`), pyString("tools/images/please_ubuntu"), pyString("please_ubuntu_digest"), - })) -} - func TestObjLen(t *testing.T) { l := pyList{pyInt(1)} assert.EqualValues(t, 1, objLen(l)) diff --git a/src/parse/asp/interpreter_test.go b/src/parse/asp/interpreter_test.go index 75c18f76c..aac7fa18c 100644 --- a/src/parse/asp/interpreter_test.go +++ b/src/parse/asp/interpreter_test.go @@ -5,7 +5,9 @@ package asp import ( "fmt" + iofs "io/fs" "testing" + "testing/fstest" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -14,11 +16,11 @@ import ( "github.com/thought-machine/please/src/core" ) -func parseFileToStatements(filename string) (*scope, []*Statement, error) { - return parseFileToStatementsInPkg(filename, core.NewPackage("test/package")) +func parseFileToStatements(fs iofs.FS, filename string) (*scope, []*Statement, error) { + return parseFileToStatementsInPkg(fs, filename, core.NewPackage("test/package")) } -func parseFileToStatementsInPkg(filename string, pkg *core.Package) (*scope, []*Statement, error) { +func parseFileToStatementsInPkg(fs iofs.FS, filename string, pkg *core.Package) (*scope, []*Statement, error) { state := core.NewDefaultBuildState() state.Config.BuildConfig = map[string]string{"parser-engine": "python27"} parser := NewParser(state) @@ -28,7 +30,7 @@ func parseFileToStatementsInPkg(filename string, pkg *core.Package) (*scope, []* panic(err) } parser.MustLoadBuiltins("builtins.build_defs", src) - statements, err := parser.parse(nil, filename) + statements, err := parser.parse(fs, filename) if err != nil { panic(err) } @@ -39,7 +41,15 @@ func parseFileToStatementsInPkg(filename string, pkg *core.Package) (*scope, []* } func parseFile(filename string) (*scope, error) { - s, _, err := parseFileToStatements(filename) + s, _, err := parseFileToStatements(nil, filename) + return s, err +} + +func parseString(str string) (*scope, error) { + s, _, err := parseFileToStatements( + fstest.MapFS{"test.build": {Data: []byte(str)}}, + "test.build", + ) return s, err } @@ -307,7 +317,7 @@ func TestInterpreterArgumentCompatibility(t *testing.T) { } func TestInterpreterOptimiseConfig(t *testing.T) { - s, statements, err := parseFileToStatements("src/parse/asp/test_data/interpreter/optimise_config.build") + s, statements, err := parseFileToStatements(nil, "src/parse/asp/test_data/interpreter/optimise_config.build") assert.NoError(t, err) assert.Equal(t, 1, len(statements)) assert.NotNil(t, statements[0].Ident) @@ -344,6 +354,201 @@ func TestInterpreterFStrings(t *testing.T) { assert.EqualValues(t, "6", s.Lookup("nest_test")) } +func TestInterpreterStringFormat(t *testing.T) { + tests := map[string]map[string]struct { + FormatStr, Args string + Expected string + Error string + }{ + "Edge cases": { + "Empty format string, positional arguments": { + FormatStr: ``, + Args: `"one", "two", "three"`, + Expected: "", + }, + "Empty format string, keyword arguments": { + FormatStr: ``, + Args: `zero="a", one="b", two="c"`, + Expected: "", + }, + "Escaped opening delimiter before replacement field": { + FormatStr: `{} {{{} {}`, + Args: `"one", "two", "three"`, + Expected: "one {two three", + }, + "Escaped opening delimiter after replacement field": { + FormatStr: `{} {}{{ {}`, + Args: `"one", "two", "three"`, + Expected: "one two{ three", + }, + "Escaped closing delimiter before replacement field": { + FormatStr: `{} }}{} {}`, + Args: `"one", "two", "three"`, + Expected: "one }two three", + }, + "Escaped closing delimiter after replacement field": { + FormatStr: `{} {}}} {}`, + Args: `"one", "two", "three"`, + Expected: "one two} three", + }, + "Repeated escaped opening delimiters": { + FormatStr: `{} {{{{{{{{{} {}`, + Args: `"one", "two", "three"`, + Expected: "one {{{{two three", + }, + "Repeated escaped closing delimiters": { + FormatStr: `{} }}}}}}}}{} {}`, + Args: `"one", "two", "three"`, + Expected: "one }}}}two three", + }, + }, + "Format string errors": { + // This manifests as a dangling closing delimiter at character 3, because the opening "{{" is interpreted as an + // escaped "{" character. + "Dangling opening delimiter at start of string": { + FormatStr: `{{} {} {}`, + Args: `"one", "two", "three"`, + Error: "single '}' encountered in format string", + }, + "Dangling opening delimiter at end of string": { + FormatStr: `{} {} {}{`, + Args: `"one", "two", "three"`, + Error: "single '{' encountered in format string", + }, + "Dangling closing delimiter at start of string": { + FormatStr: `}{} {} {}`, + Args: `"one", "two", "three"`, + Error: "single '}' encountered in format string", + }, + "Dangling closing delimiter at end of string": { + FormatStr: `{} {} {}}`, + Args: `"one", "two", "three"`, + Error: "single '}' encountered in format string", + }, + }, + "Reference by order": { + "Equal number of fields and arguments": { + FormatStr: `{} {} {}`, + Args: `"one", "two", "three"`, + Expected: "one two three", + }, + "Fewer fields than arguments": { + FormatStr: `{} {}`, + Args: `"one", "two", "three"`, + Expected: "one two", + }, + "More fields than arguments": { + FormatStr: `{} {} {} {}`, + Args: `"one", "two", "three"`, + Error: "replacement index 3 out of range for positional arguments", + }, + "Named replacement field": { + FormatStr: `{} {two} {}`, + Args: `"one", "two", "three"`, + Error: "cannot switch from automatic field numbering to manual field specification", + }, + }, + "Reference by position": { + "Arguments referenced in order": { + FormatStr: `0={0} 1={1} 2={2}`, + Args: `"zero", "one", "two"`, + Expected: "0=zero 1=one 2=two", + }, + "Arguments referenced out of order": { + FormatStr: `2={2} 0={0} 1={1}`, + Args: `"zero", "one", "two"`, + Expected: "2=two 0=zero 1=one", + }, + "Same argument referenced multiple times": { + FormatStr: `2={2} 0={0} 2={2}`, + Args: `"zero", "one", "two"`, + Expected: "2=two 0=zero 2=two", + }, + "Unreferenced arguments": { + FormatStr: `0={0} 1={1}`, + Args: `"zero", "one", "two"`, + Expected: "0=zero 1=one", + }, + // The empty field must occur after the first field, otherwise strFormat will initially (and correctly) interpret the + // format string as reference-by-order and produce a different error when parsing the second field. + "Empty field name after first field": { + FormatStr: `1={1} 2={} 0={0}`, + Args: `"zero", "one", "two"`, + Error: "cannot switch from automatic field numbering to manual field specification", + }, + "Non-numerical field name": { + FormatStr: `1={1} 2={two} 0={0}`, + Args: `"zero", "one", "two"`, + Error: "must use numerical replacement fields with positional arguments", + }, + "Non-existent argument": { + FormatStr: `1={1} 2={2} 3={3} 0={0}`, + Args: `"zero", "one", "two"`, + Error: "replacement index 3 out of range for positional arguments", + }, + }, + "Reference by name": { + "Arguments referenced in order": { + FormatStr: `0={zero} 1={one} 2={two}`, + Args: `zero="a", one="b", two="c"`, + Expected: "0=a 1=b 2=c", + }, + "Arguments referenced out of order": { + FormatStr: `2={two} 0={zero} 1={one}`, + Args: `zero="a", one="b", two="c"`, + Expected: "2=c 0=a 1=b", + }, + "Same argument referenced multiple times": { + FormatStr: `1={one} 0={zero} 1={one}`, + Args: `zero="a", one="b"`, + Expected: "1=b 0=a 1=b", + }, + "Unreferenced arguments": { + FormatStr: `0={zero} 1={one}`, + Args: `zero="a", one="b", two="c"`, + Expected: "0=a 1=b", + }, + "Syntactically invalid field name": { + FormatStr: `1={one} 2={two!} 0={zero}`, + Args: `zero="a", one="b", two="c"`, + Error: "unspecified keyword argument 'two!'", + }, + "Empty field name": { + FormatStr: `1={one} 2={} 0={zero}`, + Args: `zero="a", one="b", two="c"`, + Error: "must use named replacement fields with keyword arguments", + }, + "Numerical field name": { + FormatStr: `1={one} 2={2} 0={zero}`, + Args: `zero="a", one="b", two="c"`, + Error: "unspecified keyword argument '2'", + }, + "Non-existent argument": { + FormatStr: `1={one} 2={two} 3={three} 0={zero}`, + Args: `zero="a", one="b", two="c"`, + Error: "unspecified keyword argument 'three'", + }, + }, + } + + for category, ct := range tests { + t.Run(category, func(t *testing.T) { + for desc, test := range ct { + t.Run(desc, func(t *testing.T) { + s, err := parseString(fmt.Sprintf(`ret = "%s".format(%s)`, test.FormatStr, test.Args)) + if test.Error == "" { + assert.NotNil(t, s) + assert.NoError(t, err) + assert.EqualValues(t, pyString(test.Expected), s.Lookup("ret")) + } else { + assert.ErrorContains(t, err, test.Error) + } + }) + } + }) + } +} + func TestInterpreterSubincludeConfig(t *testing.T) { s, err := parseFile("src/parse/asp/test_data/interpreter/partition.build") assert.NoError(t, err) @@ -422,7 +627,7 @@ func TestRemoveAffixes(t *testing.T) { func TestSubrepoName(t *testing.T) { pkg := core.NewPackage("test/pkg") pkg.SubrepoName = "pleasings" - s, _, err := parseFileToStatementsInPkg("src/parse/asp/test_data/interpreter/subrepo_name.build", pkg) + s, _, err := parseFileToStatementsInPkg(nil, "src/parse/asp/test_data/interpreter/subrepo_name.build", pkg) assert.NoError(t, err) assert.EqualValues(t, "pleasings", s.Lookup("subrepo")) @@ -460,7 +665,7 @@ func TestFloorDivide(t *testing.T) { } func TestFStringOptimisation(t *testing.T) { - s, stmts, err := parseFileToStatements("src/parse/asp/test_data/interpreter/fstring_optimisation.build") + s, stmts, err := parseFileToStatements(nil, "src/parse/asp/test_data/interpreter/fstring_optimisation.build") require.NoError(t, err) assert.EqualValues(t, s.Lookup("x"), "test") // Check that it's been optimised to something