From aa57a5150ecae90dce572a5e130b94b29fce7332 Mon Sep 17 00:00:00 2001 From: Ryan Richard Date: Wed, 1 Feb 2023 10:49:38 -0800 Subject: [PATCH] Add package starformer, uses Starlark for user-defined authn filters --- go.mod | 1 + go.sum | 2 + internal/starformer/starformer.go | 183 +++++++++++++ internal/starformer/starformer_test.go | 360 +++++++++++++++++++++++++ 4 files changed, 546 insertions(+) create mode 100644 internal/starformer/starformer.go create mode 100644 internal/starformer/starformer_test.go diff --git a/go.mod b/go.mod index 18a1cb1e..b46654fe 100644 --- a/go.mod +++ b/go.mod @@ -30,6 +30,7 @@ require ( github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.1 github.com/tdewolff/minify/v2 v2.12.4 + go.starlark.net v0.0.0-20210602144842-1cdb82c9e17a go.uber.org/zap v1.24.0 golang.org/x/crypto v0.5.0 golang.org/x/net v0.5.0 diff --git a/go.sum b/go.sum index 79e74c2f..5e63bc82 100644 --- a/go.sum +++ b/go.sum @@ -613,6 +613,8 @@ go.opentelemetry.io/otel/trace v1.10.0/go.mod h1:Sij3YYczqAdz+EhmGhE6TpTxUO5/F/A go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v0.19.0 h1:IVN6GR+mhC4s5yfcTbmzHYODqvWAp3ZedA2SJPI1Nnw= go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= +go.starlark.net v0.0.0-20210602144842-1cdb82c9e17a h1:wDtSCWGrX9tusypq2Qq9xzaA3Tf/+4D2KaWO+HQvGZE= +go.starlark.net v0.0.0-20210602144842-1cdb82c9e17a/go.mod h1:t3mmBBPzAVvK0L0n1drDmrQsJ8FoIx4INCqVMTr/Zo0= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.10.0 h1:9qC72Qh0+3MqyJbAn8YU5xVq1frD8bn3JtD2oXtafVQ= diff --git a/internal/starformer/starformer.go b/internal/starformer/starformer.go new file mode 100644 index 00000000..a332b26f --- /dev/null +++ b/internal/starformer/starformer.go @@ -0,0 +1,183 @@ +// Copyright 2021 the Pinniped contributors. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +// Package starformer is an implementation of UpstreamToDownstreamTransformer using Starlark scripts. +// See Starlark dialect language documentation here: https://github.com/google/starlark-go/blob/master/doc/spec.md +// A video introduction to Starlark and how to integrate it into projects is here: https://www.youtube.com/watch?v=9P_YKVhncWI +package starformer + +import ( + "fmt" + + "go.starlark.net/lib/json" + starlarkmath "go.starlark.net/lib/math" + "go.starlark.net/lib/time" + "go.starlark.net/resolve" + "go.starlark.net/starlark" + + "go.pinniped.dev/internal/plog" +) + +const ( + maxExecutionSteps = 10000000 + transformFunctionName = "transform" +) + +// Configure some global variables in starlark-go. +// nolint:gochecknoinits // wish these weren't globals but oh well +func init() { + // Allow the non-standard "set" data structure to be used. + resolve.AllowSet = true + + // Note that we could allow "while" statements and recursive functions, but the language already + // has "for" loops so it seems unnecessary for our use case. This is currently the default + // value in starlark-go but repeating it here as documentation. + resolve.AllowRecursion = false +} + +type Transformer struct { + hook *starlark.Function +} + +// New creates an instance of Transformer. Given some Starlark source code as a string, it loads the code. +// If there is any error during loading, it will return the error. It expects the loaded code to define +// a Starlark function called "transform" which should take two positional arguments. The returned +// Transformer can be safely called from multiple threads simultaneously, no matter how the Starlark +// source code was written, because the Starlark module has been frozen (made immutable). +func New(starlarkSourceCode string) (*Transformer, error) { + // Create a Starlark thread in which the source will be loaded. + thread := &starlark.Thread{ + Name: "starlark script loader", + Print: func(thread *starlark.Thread, msg string) { + // When the script has a top-level print(), send it to the server log. + plog.Debug("debug message while loading starlark transform script", "msg", msg) + }, + Load: func(thread *starlark.Thread, module string) (starlark.StringDict, error) { + // Allow starlark-go's custom built-in modules to be loaded by scripts if they desire. + switch module { + case "json.star": + return starlark.StringDict{"json": json.Module}, nil + case "time.star": + return starlark.StringDict{"time": time.Module}, nil + case "math.star": + return starlark.StringDict{"math": starlarkmath.Module}, nil + default: + // Don't allow any other file to be loaded. + return nil, fmt.Errorf("only the following modules may be loaded: json.star, time.star, math.star") + } + }, + } + + // Prevent the top-level statements of the Starlark script from accidentally running forever. + thread.SetMaxExecutionSteps(maxExecutionSteps) + + // Start with empty predeclared names, aside from the built-ins. + predeclared := starlark.StringDict{} + + // Load a Starlark script. Initialization of a script runs its top-level statements from top to bottom, + // and then "freezes" all of the values making them immutable. The result can be used in multiple threads + // simultaneously without interfering, communicating, or racing with each other. The filename given here + // will appear in some Starlark error messages. + globals, err := starlark.ExecFile(thread, "transform.star", starlarkSourceCode, predeclared) + if err != nil { + return nil, fmt.Errorf("error while loading starlark transform script: %w", err) + } + + // Get the function called "transform" from the global state of the module that was just loaded. + hook, _ := globals[transformFunctionName].(*starlark.Function) + if hook == nil { + return nil, fmt.Errorf("starlark script does not define %q function", transformFunctionName) + } + + // Check that the "transform" function takes the expected number of arguments so we can call it later. + if hook.NumParams() != 2 { + return nil, fmt.Errorf("starlark script's global %q function has %d parameters but should have 2", transformFunctionName, hook.NumParams()) + } + + return &Transformer{hook: hook}, nil +} + +// Transform calls the Starlark "transform" function that was loaded by New. The username and groups params are +// passed into the Starlark function, and the return values of the Starlark function are returned. If there is an error +// during the call to the Starlark function (either a programming error, a runtime error, or an intentional call to +// Starlark's `fail` built-in function) then Transform will return the error. This function is thread-safe. +// The runtime of this function depends on the complexity of the Starlark source code, but for a typical Starlark +// function will be something on the order of 50µs on a modern laptop. +func (t *Transformer) Transform(username string, groups []string) (string, []string, error) { + // TODO: maybe add a context param for cancellation, which is supported in starlark-go by + // calling thread.Cancel() from any goroutine, or maybe this doesn't matter because there is + // already a maxExecutionSteps so scripts are guaranteed to finish within a reasonable time. + + // Create a Starlark thread in which the function will be called. + thread := &starlark.Thread{ + Name: "starlark script executor", + Print: func(thread *starlark.Thread, msg string) { + // When the script's 'transform' function has a print(), send it to the server log. + plog.Debug("debug message while running starlark transform script", "msg", msg) + }, + } + + // Prevent the Starlark function from accidentally running forever. + thread.SetMaxExecutionSteps(maxExecutionSteps) + + // Prepare the function arguments as Starlark values. + groupsTuple := starlark.Tuple{} + for _, group := range groups { + groupsTuple = append(groupsTuple, starlark.String(group)) + } + args := starlark.Tuple{starlark.String(username), groupsTuple} + + // Call the Starlark hook function in the new thread and pass the arguments. + // Get back the function's return value or an error. + hookReturnValue, err := starlark.Call(thread, t.hook, args, nil) + + // Errors could be programming mistakes in the script, or could be an intentional usage of the `fail` built-in. + // Either way, return an error to reject the login. + if err != nil { + return "", nil, fmt.Errorf("error while running starlark %q function: %w", transformFunctionName, err) + } + + // The special Starlark value 'None' is interpreted here as a shortcut to mean make no edits. + if hookReturnValue == starlark.None { + return username, groups, nil + } + + // TODO: maybe offer a way for the user to reject a login with a nice error message which we can distinguish from + // an accidental coding error, for example by returning a single string from their 'transform' function instead + // of a tuple, or by returning a special value that we set up in the module's state in advance like + // `return rejectAuthentication(message)` + + // Otherwise the function should have returned a tuple with two values. + returnedTuple, ok := hookReturnValue.(starlark.Tuple) + if !ok || returnedTuple.Len() != 2 { + return "", nil, fmt.Errorf("expected starlark %q function to return None or a Tuple of length 2", transformFunctionName) + } + + // The first value in the returned tuple is the username. Turn it back into a golang string. + transformedUsername, ok := starlark.AsString(returnedTuple.Index(0)) + if !ok || len(transformedUsername) == 0 { + return "", nil, fmt.Errorf("expected starlark %q function's return tuple to have a non-empty string as the first value", transformFunctionName) + } + + // The second value in the returned tuple is an iterable of group names. + returnedGroups, ok := returnedTuple.Index(1).(starlark.Iterable) + if !ok { + return "", nil, fmt.Errorf("expected starlark %q function's return tuple to have an iterable value as the second value", transformFunctionName) + } + + // Turn the returned iterable of group names back into a golang []string, including turning an empty iterable into an empty slice. + transformedGroupNames := []string{} + groupsIterator := returnedGroups.Iterate() + defer groupsIterator.Done() + var transformedGroup starlark.Value + for groupsIterator.Next(&transformedGroup) { + transformedGroupName, ok := starlark.AsString(transformedGroup) + if !ok || len(transformedGroupName) == 0 { + return "", nil, fmt.Errorf("expected starlark %q function's return tuple's second value to contain only non-empty strings", transformFunctionName) + } + transformedGroupNames = append(transformedGroupNames, transformedGroupName) + } + + // Got username and group names, so return them as the transformed values. + return transformedUsername, transformedGroupNames, nil +} diff --git a/internal/starformer/starformer_test.go b/internal/starformer/starformer_test.go new file mode 100644 index 00000000..d53fdb84 --- /dev/null +++ b/internal/starformer/starformer_test.go @@ -0,0 +1,360 @@ +// Copyright 2021 the Pinniped contributors. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +package starformer + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "go.pinniped.dev/internal/here" +) + +func TestTypicalPerformance(t *testing.T) { + t.Parallel() + + starformer, err := New(here.Doc(` + def transform(username, groups): + prefixedGroups = [] + for g in groups: + prefixedGroups.append("group_prefix:" + g) + return "username_prefix:" + username, prefixedGroups + `)) + require.NoError(t, err) + require.NotNil(t, starformer) + + groups := []string{} + wantGroups := []string{} + for i := 0; i < 100; i++ { + groups = append(groups, fmt.Sprintf("g%d", i)) + wantGroups = append(wantGroups, fmt.Sprintf("group_prefix:g%d", i)) + } + + // Before looking at performance, check that the behavior of the function is correct. + gotUsername, gotGroups, err := starformer.Transform("ryan", groups) + require.NoError(t, err) + require.Equal(t, "username_prefix:ryan", gotUsername) + require.Equal(t, wantGroups, gotGroups) + // Calling it a second time should give the same results again for the same inputs. + gotUsername, gotGroups, err = starformer.Transform("ryan", groups) + require.NoError(t, err) + require.Equal(t, "username_prefix:ryan", gotUsername) + require.Equal(t, wantGroups, gotGroups) + + // This is meant to give a sense of typical runtime of a Starlark function which transforms + // a username and 100 group names. It is not meant to be a pass/fail test or scientific benchmark test. + iterations := 1000 + start := time.Now() + for i := 0; i < iterations; i++ { + _, _, _ = starformer.Transform("ryan", groups) + } + elapsed := time.Since(start) + t.Logf("TestTypicalPerformance %d iteration of Transform took %s; average runtime %s", iterations, elapsed, elapsed/time.Duration(iterations)) + // On my laptop this prints: TestTypicalPerformance 1000 iteration of Transform took 299.109387ms; average runtime 299.109µs +} + +func TestTransformer(t *testing.T) { + // See Starlark dialect language documentation here: https://github.com/google/starlark-go/blob/master/doc/spec.md + tests := []struct { + name string + starlarkSrc string + username string + groups []string + wantUsername string + wantGroups []string + wantNewErr string + wantTransformErr string + }{ + { + name: "identity function makes no modification", + starlarkSrc: here.Doc(` + def transform(username, groups): + return username, groups + `), + username: "ryan", + groups: []string{"g1", "g2"}, + wantUsername: "ryan", + wantGroups: []string{"g1", "g2"}, + }, + { + name: "returning None is a shortcut for making no modification", + starlarkSrc: here.Doc(` + def transform(username, groups): + return None + `), + username: "ryan", + groups: []string{"g1", "g2"}, + wantUsername: "ryan", + wantGroups: []string{"g1", "g2"}, + }, + { + name: "prefixing the username", + starlarkSrc: here.Doc(` + def transform(username, groups): + return "foobar:" + username, groups + `), + username: "ryan", + groups: []string{"g1", "g2"}, + wantUsername: "foobar:ryan", + wantGroups: []string{"g1", "g2"}, + }, + { + name: "down-casing the username", + starlarkSrc: here.Doc(` + def transform(username, groups): + return username.lower(), groups + `), + username: "RyAn", + groups: []string{"g1", "g2"}, + wantUsername: "ryan", + wantGroups: []string{"g1", "g2"}, + }, + { + name: "removing all groups", + starlarkSrc: here.Doc(` + def transform(username, groups): + return username, () + `), + username: "ryan", + groups: []string{"g1", "g2"}, + wantUsername: "ryan", + wantGroups: []string{}, + }, + { + name: "modifying groups", + starlarkSrc: here.Doc(` + def transform(username, groups): + return username, ("new-g1", "new-g2") + `), + username: "ryan", + groups: []string{"g1", "g2"}, + wantUsername: "ryan", + wantGroups: []string{"new-g1", "new-g2"}, + }, + { + name: "converting the groups param to a list type in the business logic is easy, and returning groups as a list works", + starlarkSrc: here.Doc(` + def transform(username, groups): + groupsList = list(groups) + groupsList.pop() + return username, groupsList + `), + username: "ryan", + groups: []string{"g1", "g3"}, + wantUsername: "ryan", + wantGroups: []string{"g1"}, + }, + { + name: "can print from the script", + starlarkSrc: here.Doc(` + print("this should get logged by Pinniped but it is not asserted here") + def transform(username, groups): + print("this should get logged by Pinniped but it is not asserted here:", username) + return username, groups + `), + username: "ryan", + groups: []string{"g1", "g2"}, + wantUsername: "ryan", + wantGroups: []string{"g1", "g2"}, + }, + { + name: "rejecting a login by raising an error", + starlarkSrc: here.Doc(` + def transform(username, groups): + if username == "ryan": + fail("i don't like the username", username) + else: + return username, groups + `), + username: "ryan", + groups: []string{"g1", "g2"}, + wantTransformErr: `error while running starlark "transform" function: fail: i don't like the username ryan`, + }, + { + name: "using the non-standard 'set' type is allowed", + starlarkSrc: here.Doc(` + def transform(username, groups): + groupsSet = set(groups) + if "g2" in groupsSet: + return username, groups + else: + fail("user", username, "does not belong to group g2") + `), + username: "ryan", + groups: []string{"g1", "g3"}, + wantTransformErr: `error while running starlark "transform" function: fail: user ryan does not belong to group g2`, + }, + { + name: "using the non-standard 'set' type is allowed, and the groups can be returned as a set", + starlarkSrc: here.Doc(` + def transform(username, groups): + groupsSet = set(groups) + groupsSet = groupsSet.union(["g42"]) + if "g2" in groupsSet: + return username, groupsSet + else: + fail("user", username, "does not belong to group g2") + `), + username: "ryan", + groups: []string{"g1", "g2"}, + wantUsername: "ryan", + wantGroups: []string{"g1", "g2", "g42"}, + }, + { + name: "the math module may be loaded", + starlarkSrc: here.Doc(` + load('math.star', 'math') + def transform(username, groups): + if math.round(0.4) == 0.0: + return username, groups + else: + fail("math module is supposed to work") + `), + username: "ryan", + groups: []string{"g1", "g2"}, + wantUsername: "ryan", + wantGroups: []string{"g1", "g2"}, + }, + { + name: "the json module may be loaded", + starlarkSrc: here.Doc(` + load('json.star', 'json') + def transform(username, groups): + return username, [json.encode({"hello": groups[0]})] + `), + username: "ryan", + groups: []string{"g1", "g2"}, + wantUsername: "ryan", + wantGroups: []string{`{"hello":"g1"}`}, + }, + { + name: "the time module may be loaded", + starlarkSrc: here.Doc(` + load('time.star', 'time') + def transform(username, groups): + if time.now() > time.parse_time("2001-01-20T00:00:00Z"): + return "someone", ["g3", "g4"] + fail("huh?") + `), + username: "ryan", + groups: []string{"g1", "g2"}, + wantUsername: "someone", + wantGroups: []string{"g3", "g4"}, + }, + { + name: "loading other modules results in an error", + starlarkSrc: here.Doc(` + load('other.star', 'other') + def transform(username, groups): + return username, groups + `), + username: "ryan", + groups: []string{"g1", "g2"}, + wantNewErr: "error while loading starlark transform script: cannot load other.star: only the following modules may be loaded: json.star, time.star, math.star", + }, + { + name: "unexpected error during loading", + starlarkSrc: here.Doc(` + this is not valid starlark syntax + `), + wantNewErr: "error while loading starlark transform script: transform.star:1:8: got illegal token, want newline", + }, + { + name: "too many execution steps during loading", + starlarkSrc: here.Doc(` + def helper(): + a = 0 + for x in range(1000000): + a += 1 + helper() + `), + wantNewErr: "error while loading starlark transform script: Starlark computation cancelled: too many steps", + }, + { + name: "too many execution steps during transform function", + starlarkSrc: here.Doc(` + def transform(username, groups): + a = 0 + for x in range(1000000): + a += 1 + return username, groups + `), + username: "ryan", + groups: []string{"g1", "g2"}, + wantTransformErr: `error while running starlark "transform" function: Starlark computation cancelled: too many steps`, + }, + { + name: "returning the wrong data type", + starlarkSrc: here.Doc(` + def transform(username, groups): + return 42 + `), + username: "ryan", + groups: []string{"g1", "g2"}, + wantTransformErr: `expected starlark "transform" function to return None or a Tuple of length 2`, + }, + { + name: "returning the wrong data type inside the groups iterable return value", + starlarkSrc: here.Doc(` + def transform(username, groups): + return username, ("g1", 42) + `), + username: "ryan", + groups: []string{"g1", "g2"}, + wantTransformErr: `expected starlark "transform" function's return tuple's second value to contain only non-empty strings`, + }, + { + name: "returning an empty string inside the groups iterable return value", + starlarkSrc: here.Doc(` + def transform(username, groups): + return username, ("g1", "", "g2") + `), + username: "ryan", + groups: []string{"g1", "g2"}, + wantTransformErr: `expected starlark "transform" function's return tuple's second value to contain only non-empty strings`, + }, + { + name: "no transform function defined", + starlarkSrc: here.Doc(` + def otherFunction(username, groups): + return None + `), + wantNewErr: `starlark script does not define "transform" function`, + }, + { + name: "transform function defined with wrong number of positional parameters", + starlarkSrc: here.Doc(` + def transform(username): + return None + `), + wantNewErr: `starlark script's global "transform" function has 1 parameters but should have 2`, + }, + } + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + starformer, err := New(tt.starlarkSrc) + if tt.wantNewErr != "" { + require.EqualError(t, err, tt.wantNewErr) + require.Nil(t, starformer) + return // wanted an error from New, so don't keep going + } + require.NoError(t, err) + require.NotNil(t, starformer) + + gotUsername, gotGroups, err := starformer.Transform(tt.username, tt.groups) + if tt.wantTransformErr != "" { + require.EqualError(t, err, tt.wantTransformErr) + } else { + require.NoError(t, err) + } + require.Equal(t, tt.wantUsername, gotUsername) + require.Equal(t, tt.wantGroups, gotGroups) + }) + } +}