-
Notifications
You must be signed in to change notification settings - Fork 212
Make str.format more Pythonic
#3412
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -25,6 +25,24 @@ import ( | |||||
| // A nativeFunc is a function that implements a builtin function natively. | ||||||
| type nativeFunc func(*scope, []pyObject) pyObject | ||||||
|
|
||||||
| // A strFormatRefMode is a mode by which the replacement fields in a str.format format string may refer to the remaining | ||||||
| // arguments passed to str.format. | ||||||
| type strFormatRefMode int | ||||||
|
|
||||||
| const ( | ||||||
| // strFormatByOrder indicates that replacement fields refer to positional arguments in the order in which they were | ||||||
| // given to str.format. Fields are numbered automatically; the field names themselves are therefore empty (i.e. `{}`). | ||||||
| strFormatByOrder strFormatRefMode = iota | ||||||
|
|
||||||
| // strFormatByPosition indicates that replacement fields refer to positional arguments in an arbitrary order. Field | ||||||
| // names are numerical and zero-indexed (e.g. `{0}`, `{1}`). | ||||||
| strFormatByPosition | ||||||
|
|
||||||
| // strFormatByName indicates that replacement fields refer to the names of keyword arguments. Field names are therefore | ||||||
| // function parameters (e.g. `{name}`, `{id}`). | ||||||
| strFormatByName | ||||||
| ) | ||||||
|
|
||||||
| // registerBuiltins sets up the "special" builtins that map to native code. | ||||||
| func registerBuiltins(s *scope) { | ||||||
| const varargs = true | ||||||
|
|
@@ -582,48 +600,97 @@ func strRFind(s *scope, args []pyObject) pyObject { | |||||
| return newPyInt(strings.LastIndex(string(self), string(needle))) | ||||||
| } | ||||||
|
|
||||||
| // strFormat implements the str.format function. It interpolates a format string using the arguments passed to | ||||||
| // str.format and returns the interpolated string. | ||||||
| // | ||||||
| // Format strings may contain literal text or replacement fields delimited by `{` and `}` characters. The string between | ||||||
| // these delimiters - the "field name" - denotes which argument to str.format should be substituted in place of the | ||||||
| // field. strFormat supports three reference modes within field names - see [strFormatByOrder], [strFormatByPosition] | ||||||
| // and [strFormatByName]. Only one reference mode may be used per format string. `{{` and `}}` are treated as the | ||||||
| // literal characters `{` and `}` respectively. | ||||||
| func strFormat(s *scope, args []pyObject) pyObject { | ||||||
| self := string(args[0].(pyString)) | ||||||
| var buf strings.Builder | ||||||
| buf.Grow(len(self)) // most reasonable guess available as to how big it might be | ||||||
| buf.Grow(len(self)) // This is our best possible guess as to how big the output might be. | ||||||
|
|
||||||
| // Decide initially whether to parse replacement field names as numbers (which is only permitted if str.format is | ||||||
| // called with positional arguments) or as names (which is only permitted if it is called with keyword arguments). | ||||||
| // | ||||||
| // Although we also support automatic field numbering, both that and numerical replacement fields require str.format to | ||||||
| // be called with positional arguments, so it is impossible to tell whether we should use automatic field numbering | ||||||
| // until we parse the first field name in the format string. args will always contain at least one element (the format | ||||||
| // string). For now, assume we're using numerical replacement fields if it contains more than one element; otherwise, | ||||||
| // assume we're using named replacement fields. | ||||||
| mode := strFormatByPosition | ||||||
| if len(args) == 1 { | ||||||
| mode = strFormatByName | ||||||
| } | ||||||
|
|
||||||
| posArgs := len(args) - 1 | ||||||
| nextPosIndex := 0 | ||||||
|
|
||||||
| arg := 1 // what arg index are we up to for positional args | ||||||
| // This loop consumes the format string from left to right: | ||||||
| for { | ||||||
| start := strings.IndexByte(self, '{') | ||||||
| // Locate the first (opening or closing) delimiter in the remainder of the format string. | ||||||
| start := strings.IndexAny(self, "{}") | ||||||
| // If there are no more delimiters remaining in the format string, output the rest of the format string and end | ||||||
| // processing of the format string. | ||||||
| if start == -1 { | ||||||
| buf.WriteString(self) | ||||||
| break | ||||||
| } | ||||||
| end := strings.IndexByte(self[start:], '}') | ||||||
| if end == -1 { | ||||||
| // We may want to error here in some future revision | ||||||
| buf.WriteString(self) | ||||||
| break | ||||||
| // If this delimiter is an escape character for a delimiter that immediately follows it, output only one of them and | ||||||
| // return to the start of the loop to continue processing the format string. | ||||||
| if start < len(self)-1 && self[start] == self[start+1] { | ||||||
| buf.WriteString(self[:start+1]) | ||||||
| self = self[start+2:] | ||||||
| continue | ||||||
| } | ||||||
| // We now know we're about to begin parsing a field name, which means this character must be the "{" delimiter. | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is "field name" the right word here (given it may be |
||||||
| s.Assert(self[start] == '{', "single '}' encountered in format string") | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| // Find the corresponding "}" delimiter... | ||||||
| end := strings.IndexByte(self[start+1:], '}') | ||||||
| // ...and if there isn't one, this must be a malformed format string. | ||||||
| s.Assert(end != -1, "single '{' encountered in format string") | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| // Locate the "}" delimiter relative to the start of the format string, rather than to the first "{" delimiter. | ||||||
| end = start + end + 1 | ||||||
| // Output everything in the format string up to, but not including, the first "{" delimiter. | ||||||
| buf.WriteString(self[:start]) | ||||||
| end = start + end | ||||||
| if start > 0 && self[start-1] == '$' { | ||||||
| // Don't interpolate ${X} (but ${{X}} -> ${X}) | ||||||
| if self[start+1] == '{' { | ||||||
| buf.WriteString(self[start+1 : end]) | ||||||
| } else if start == end-1 { | ||||||
| // ${} interpolates as $ + positional arg | ||||||
| s.Assert(arg < len(args), "format string specifies at least %d positional arguments, but only %d were supplied", arg, len(args)-1) | ||||||
| buf.WriteString(args[arg].String()) | ||||||
| arg++ | ||||||
| } else { | ||||||
| buf.WriteString(self[start : end+1]) | ||||||
| } | ||||||
| } else if key := self[start+1 : end]; key == "" { | ||||||
| s.Assert(arg < len(args), "format string specifies at least %d positional arguments, but only %d were supplied", arg, len(args)-1) | ||||||
| buf.WriteString(args[arg].String()) | ||||||
| arg++ | ||||||
| } else if val, present := s.locals[key]; present { | ||||||
| // Extract the replacement field's name, excluding the delimiters. | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| fieldName := self[start+1 : end] | ||||||
| // Now we can decide whether to rely on automatic field numbering. This is a one-time decision that is only taken if | ||||||
| // str.format was called with positional arguments and the first field name we encounter is empty; after this, empty | ||||||
| // field names are otherwise format string errors. | ||||||
| if fieldName == "" && mode == strFormatByPosition && nextPosIndex == 0 { | ||||||
| mode = strFormatByOrder | ||||||
| } | ||||||
| // Decide how to interpret this field name: | ||||||
| switch mode { | ||||||
| case strFormatByOrder: | ||||||
| // With automatic field numbering, output the next positional argument. Field names must be empty, and there cannot | ||||||
| // be more of them than there are positional arguments (although there may ultimately be fewer). Internally, the | ||||||
| // first positional argument is the format string - the positional arguments from the caller's perspective are | ||||||
| // shifted one to the right in args. | ||||||
| s.Assert(fieldName == "", "cannot switch from automatic field numbering to manual field specification") | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am concerned that this error message will be very unclear to somebody who is unfamiliar with how this is implemented - I think a better phrasing would be something like "cannot mix replacement field specification types in one format string", and possibly include examples of the mismatched replacement fields |
||||||
| s.Assert(nextPosIndex < posArgs, "replacement index %d out of range for positional arguments", nextPosIndex) | ||||||
| buf.WriteString(args[nextPosIndex+1].String()) | ||||||
| nextPosIndex++ | ||||||
| case strFormatByPosition: | ||||||
| // With numerical replacement fields, parse the field name as an integer i and output the string value of the | ||||||
| // (zero-indexed) i'th positional argument. Internally, the first positional argument is the format string - the | ||||||
| // positional arguments from the caller's perspective are shifted one to the right in args. | ||||||
| i, err := strconv.Atoi(fieldName) | ||||||
| s.Assert(err == nil, "must use numerical replacement fields with positional arguments") | ||||||
| s.Assert(0 <= i && i < posArgs, "replacement index %d out of range for positional arguments", i) | ||||||
| buf.WriteString(args[i+1].String()) | ||||||
| case strFormatByName: | ||||||
| // With named replacement fields, output the string value of the keyword argument with the given name. | ||||||
| s.Assert(fieldName != "", "must use named replacement fields with keyword arguments") | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should also be something like "cannot mix replacement field specification types in one format string" |
||||||
| val, exists := s.locals[fieldName] | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are keyword arguments implemented as locals? |
||||||
| s.Assert(exists, "unspecified keyword argument '%s'", fieldName) | ||||||
| buf.WriteString(val.String()) | ||||||
| } else { | ||||||
| // We may want to error here in some future revision | ||||||
| buf.WriteString(self[start : end+1]) | ||||||
| } | ||||||
| // Advance beyond the closing delimiter for this field, and parse what remains in the format string. | ||||||
| self = self[end+1:] | ||||||
| } | ||||||
| return pyString(buf.String()) | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,12 +7,16 @@ import ( | |
| func BenchmarkStrFormat(b *testing.B) { | ||
| s := &scope{ | ||
| locals: map[string]pyObject{ | ||
| "spam": pyString("abc"), | ||
| "eggs": pyString("def"), | ||
| "one": pyString("123"), | ||
| "two": pyString("456"), | ||
| "spam": pyString("abc"), | ||
| "eggs": pyString("def"), | ||
| "wibble": pyString("ghi"), | ||
| "wobble": pyString("jkl"), | ||
| }, | ||
| } | ||
| args := []pyObject{ | ||
| pyString("test {} {spam} ${wibble} {} {eggs} {wobble}"), pyString("123"), pyString("456"), | ||
| pyString("test {one} {spam} ${wibble} {two} {eggs} {wobble}"), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Even though we're not asserting it, I think it's worth a comment here to state what we'd expect this to be after applying the formatting |
||
| } | ||
| b.ReportAllocs() | ||
|
|
||
|
|
@@ -52,11 +56,11 @@ export PKG=$PKG | |
| export NAME=$(echo $NAME | sed -E 's/^_(.*)#.*$/\1/') | ||
| EOF | ||
| cat << 'EOF' >> $OUT | ||
| DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | ||
| export TF_DATA_DIR="/tmp/plz/terraform/${PKG}_${NAME}" | ||
| DIR="$( cd "$( dirname "${{BASH_SOURCE[0]}}" )" >/dev/null 2>&1 && pwd )" | ||
| export TF_DATA_DIR="/tmp/plz/terraform/${{PKG}}_${{NAME}}" | ||
| >&2 echo "-> using $TF_DATA_DIR" | ||
| rm -rf "$TF_DATA_DIR" && mkdir -p "$TF_DATA_DIR" | ||
| trap "rm -rf ${TF_DATA_DIR}" EXIT | ||
| trap "rm -rf ${{TF_DATA_DIR}}" EXIT | ||
| tar xfz $DIR/$(location {tarball_target}) -C $TF_DATA_DIR | ||
| mkdir -p $TF_DATA_DIR/.terraform.d/plugins/linux_amd64 | ||
| {plugins_extract_script} | ||
|
|
@@ -76,9 +80,9 @@ else | |
| fi | ||
| if [ -n "$tfregistry_token" ]; then | ||
| cat <<EOC > $TF_DATA_DIR/.terraformrc | ||
| credentials "terraform.external.thoughtmachine.io" { | ||
| credentials "terraform.external.thoughtmachine.io" {{ | ||
| token = "$tfregistry_token" | ||
| } | ||
| }} | ||
| EOC | ||
| else | ||
| cat <<WARN >&2 | ||
|
|
@@ -91,8 +95,8 @@ else | |
| WARN | ||
| fi | ||
|
|
||
| export TF_CLI_ARGS_plan="${TF_CLI_ARGS_plan:-} -lock-timeout=60s" | ||
| export TF_CLI_ARGS_apply="${TF_CLI_ARGS_apply:-} -lock-timeout=60s" | ||
| export TF_CLI_ARGS_plan="${{TF_CLI_ARGS_plan:-}} -lock-timeout=60s" | ||
| export TF_CLI_ARGS_apply="${{TF_CLI_ARGS_apply:-}} -lock-timeout=60s" | ||
| export CWD=$(pwd) | ||
| TERRAFORM_CLI="$CWD/$(out_location {terraform_cli})" | ||
| var_file_paths=({var_file_paths_csv}) | ||
|
|
@@ -102,7 +106,7 @@ then | |
| TERRAFORM_CLI="$CWD/$(location {terraform_cli})" | ||
| var_file_paths=({var_file_paths_csv_sandbox}) | ||
| data_paths=({data_paths_csv_sandbox}) | ||
| trap 'echo -n $? 1>&3 && rm -rf ${TF_DATA_DIR} && exit 0' EXIT | ||
| trap 'echo -n $? 1>&3 && rm -rf ${{TF_DATA_DIR}} && exit 0' EXIT | ||
| fi | ||
| var_flags="" | ||
| for i in "${{!var_file_paths[@]}}" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just to check, is this used both for f-strings and explicit calls to
.format()?