Commit 5f349c2f authored by lukegb's avatar lukegb
Browse files

tokend: init

tokend is responsible for issuing service-scoped tokens based on the token held
and generated by the Vault Agent.

It can also generate "server-user" scoped tokens, which exist for convenience's
sake: they are not a strong attestation of the user on the machine, and have
limited privileges compared to a Vault token issued using e.g. `vault login
-method=oidc`.
parent 9c51da708984
Pipeline #1347 passed with stages
in 22 seconds
......@@ -11,4 +11,5 @@
nhsenglandtests = import ./nhsenglandtests args;
journal2clickhouse = import ./journal2clickhouse args;
secretsmgr = import ./secretsmgr args;
tokend = import ./tokend args;
}
......@@ -31,6 +31,9 @@
)
var (
vaultAddress = flag.String("vault_address", "https://vault.int.lukegb.com", "Address of Vault")
vaultAgentAddress = flag.String("vault_agent_address", "unix:///run/vault-agent/sock", "Address of Vault agent")
signSSHHostKeys = flag.Bool("sign_ssh_host_keys", true, "Sign SSH host keys with CA")
sshHostKeyCAPath = flag.String("ssh_host_key_ca_path", "ssh-host", "Path that the SSH CA is mounted at")
sshHostKeyRole = flag.String("ssh_host_key_role", hostname(), "Role to use for signing SSH host keys")
......@@ -609,7 +612,7 @@
cfg := vapi.DefaultConfig()
cfg.Address = "https://vault.int.lukegb.com"
cfg.AgentAddress = "http://localhost:8200"
cfg.AgentAddress = "unix:///run/vault-agent/sock"
cfg.MaxRetries = 0
cfg.Timeout = 15 * time.Minute
c, err := vapi.NewClient(cfg)
......
# SPDX-FileCopyrightText: 2022 Luke Granger-Brown <depot@lukegb.com>
#
# SPDX-License-Identifier: Apache-2.0
{ depot, ... }:
depot.third_party.buildGo.program {
name = "tokend";
srcs = [
./tokend.go
./tokencache.go
./vaultissuer.go
];
deps = with depot.third_party; [
gopkgs."github.com".golang.glog
gopkgs."github.com".hashicorp.vault.api
];
}
package main
import (
"context"
"os/user"
"sync"
"time"
log "github.com/golang/glog"
vapi "github.com/hashicorp/vault/api"
)
type TokenSecret = vapi.Secret
type ttledSecret struct {
*TokenSecret
expiration time.Time
isRenewable bool
renewThreshold time.Time
}
func wrapToken(s *TokenSecret) ttledSecret {
now := time.Now()
ttl := s.Auth.LeaseDuration
renewable := s.Auth.Renewable
var ttlBuffer time.Duration
if ttl < 300 {
// Give ourselves an extra two minute buffer for renewal.
ttlBuffer = 2 * time.Minute
}
return ttledSecret{
TokenSecret: s,
expiration: now.Add(time.Duration(ttl) * time.Second),
isRenewable: renewable,
renewThreshold: now.Add(time.Duration(ttl/2) * time.Second).Add(-ttlBuffer),
}
}
func (s ttledSecret) Expired() bool {
// We use !After rather than Before so that if it's _exactly_ now then we still return true.
return !s.expiration.After(time.Now())
}
func (s ttledSecret) ShouldRenew() bool {
if !s.isRenewable {
return false
}
return time.Now().After(s.renewThreshold)
}
type tokenInteractor interface {
Revoke(ctx context.Context, tokenSecret *TokenSecret) error
Issue(ctx context.Context, username string, isPlainUser bool) (*TokenSecret, error)
Renew(ctx context.Context, tokenSecret *TokenSecret) (*TokenSecret, error)
}
type tokenUserCache struct {
l sync.RWMutex
m map[string]ttledSecret
i tokenInteractor
}
func newCache(i tokenInteractor) *tokenUserCache {
return &tokenUserCache{
m: make(map[string]ttledSecret),
i: i,
}
}
func (c *tokenUserCache) expire() (revoke map[string]*TokenSecret) {
c.l.Lock()
defer c.l.Unlock()
var remove []string
revoke = make(map[string]*TokenSecret)
for username, s := range c.m {
// If the token has expired, remove it.
if s.Expired() {
remove = append(remove, username)
continue
}
// If the user no longer exists on the system, revoke it.
if _, err := user.Lookup(username); err != nil {
log.Infof("token for %v will be revoked because user lookup returned error: %v", username, err)
remove = append(remove, username)
revoke[username] = s.TokenSecret
continue
}
// Otherwise, leave it alone.
}
for _, username := range remove {
delete(c.m, username)
}
return revoke
}
func (c *tokenUserCache) renew() {
ctx := context.Background()
toRenew := make(map[string]*TokenSecret)
c.l.Lock()
for username, s := range c.m {
if s.ShouldRenew() {
toRenew[username] = s.TokenSecret
}
}
c.l.Unlock()
for username, s := range toRenew {
log.Infof("renewing token for %v", username)
newS, err := c.i.Renew(ctx, s)
if err != nil {
log.Errorf("renewing token for %v: %w (discarding cached token)", username, err)
// Discard the token in cache.
c.l.Lock()
delete(c.m, username)
c.l.Unlock()
continue
}
c.l.Lock()
if oldS, ok := c.m[username]; ok && oldS.TokenSecret == s {
c.m[username] = wrapToken(newS)
} else if ok {
log.Warningf("after renewing token for %v discovered that the token in cache had changed in the meantime; dropping refreshed token")
}
c.l.Unlock()
}
}
func (c *tokenUserCache) tick(ctx context.Context) error {
log.Info("token cache is ticking")
revoke := c.expire()
for username, tokenSecret := range revoke {
if err := c.i.Revoke(ctx, tokenSecret); err != nil {
log.Errorf("unable to revoke token for %v: %w", username, err)
}
}
c.renew()
return nil
}
func (c *tokenUserCache) Get(ctx context.Context, username string, isPlainUser bool) (*TokenSecret, error) {
c.l.RLock()
token, ok := c.m[username]
c.l.RUnlock()
// If we got a token, but it's expired, delete it and pretend it didn't exist.
if ok && token.Expired() {
c.l.Lock()
delete(c.m, username)
c.l.Unlock()
ok = false
}
if ok {
// We have a non-expired pre-existing token!
return token.TokenSecret, nil
}
// Issue a new token.
issuedToken, err := c.i.Issue(ctx, username, isPlainUser)
if err != nil {
return nil, err
}
// OK, now we check if someone has issued a token for us in the
// meantime... We could (but don't) do any more complex coordination
// than this, so we might actually issue two tokens for the same user
// concurrently. Even if we do this though, one of the tokens will
// "win" and we'll revoke the other one.
c.l.Lock()
defer c.l.Unlock()
if bgToken, ok := c.m[username]; ok && !bgToken.Expired() {
// Just use this one, I guess.
go c.i.Revoke(context.Background(), issuedToken)
return bgToken.TokenSecret, nil
}
token = wrapToken(issuedToken)
c.m[username] = token
return token.TokenSecret, nil
}
func (c *tokenUserCache) Purge(ctx context.Context, username string) {
c.l.Lock()
token, ok := c.m[username]
delete(c.m, username)
c.l.Unlock()
if !ok {
return
}
// Revoke the token as well, for good measure, in case the parent hasn't already done that.
c.i.Revoke(ctx, token.TokenSecret)
}
package main
import (
"context"
"encoding/json"
"flag"
"fmt"
"io"
"net"
"net/http"
"os"
"os/user"
"strconv"
"strings"
"sync"
"time"
"golang.org/x/sys/unix"
log "github.com/golang/glog"
vapi "github.com/hashicorp/vault/api"
)
var (
listenPath = flag.String("listen_path", "/run/tokend/sock", "Path to listen for connections to tokend.")
agentAddr = flag.String("agent_address", "unix:///run/vault-agent/sock", "Address of vault agent.")
cacheTickInterval = flag.Duration("cache_tick_interval", 1*time.Minute, "Time between checking for expirations.")
userGroup = flag.String("user_group", "users", "Name of a group that indicates that the requesting user is a 'user' and not a service.")
)
type userContextKeyType struct{}
var userContextKey = userContextKeyType{}
type userData struct {
Username string
IsPlainUser bool
}
type vaultProxier struct {
v *vapi.Client
c *tokenUserCache
hc *http.Client
}
func shouldAttachToken(path string) bool {
if path == "/v1/auth/token/revoke-self" {
return false
}
return true
}
func shouldObfuscateTokenResponse(r *http.Request, resp *http.Response, attachedToken bool) bool {
path := r.URL.Path
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return false
}
return attachedToken && (path == "/v1/auth/token/lookup-self" || path == "/v1/auth/token/renew-self")
}
func obfuscateAndCopyResponse(w io.Writer, r io.Reader) error {
sec, err := vapi.ParseSecret(r)
if err != nil {
return err
}
delete(sec.Data, "id")
delete(sec.Data, "accessor")
if sec.Auth != nil {
sec.Auth.ClientToken = ""
sec.Auth.Accessor = ""
}
return json.NewEncoder(w).Encode(sec)
}
func (vp *vaultProxier) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
ctx := r.Context()
u, ok := ctx.Value(userContextKey).(*userData)
if !ok {
http.Error(rw, "no username could be determined from the request", http.StatusInternalServerError)
return
}
t, err := vp.c.Get(ctx, u.Username, u.IsPlainUser)
if err != nil {
http.Error(rw, fmt.Sprintf("fetching token for %s: %v", u.Username, err), http.StatusInternalServerError)
return
}
tokenStr, err := t.TokenID()
if err != nil {
http.Error(rw, fmt.Sprintf("extracting token string for %s: %v", u.Username, err), http.StatusInternalServerError)
return
}
outReq := r.Clone(ctx)
outReq.RequestURI = ""
outReq.URL.Scheme = "http"
outReq.URL.Host = "vault-agent"
outReq.Trailer = nil
attachedToken := false
if shouldAttachToken(r.URL.Path) {
outReq.Header.Set("X-Vault-Token", tokenStr)
attachedToken = true
}
log.Infof("incoming request [%v / isPlainUser=%v] %v %v", u.Username, u.IsPlainUser, r.Method, r.URL.Path)
start := time.Now()
resp, err := vp.hc.Do(outReq)
if err != nil {
http.Error(rw, fmt.Sprintf("making backend request to vault agent: %v", err), http.StatusInternalServerError)
return
}
defer resp.Body.Close()
log.Infof("outgoing response [%v / isPlainUser=%v] %v %v: %v %v", u.Username, u.IsPlainUser, r.Method, r.URL.Path, resp.StatusCode, time.Now().Sub(start))
for k, vs := range resp.Header {
rw.Header()[k] = vs
}
if shouldObfuscateTokenResponse(r, resp, attachedToken) {
rw.Header().Del("Content-Length")
rw.WriteHeader(resp.StatusCode)
if err := obfuscateAndCopyResponse(rw, resp.Body); err != nil {
log.Errorf("copying obfuscated lookup-self response: %v", err)
}
} else {
rw.WriteHeader(resp.StatusCode)
if _, err := io.Copy(rw, resp.Body); err != nil {
log.Errorf("copying response from agent: %v", err)
}
}
}
var (
userGroupGidSaved string
userGroupGidOnce sync.Once
)
func userGroupGid() string {
userGroupGidOnce.Do(func() {
userGroupGidSaved = ""
if *userGroup == "" {
// Disabled.
return
}
g, err := user.LookupGroup(*userGroup)
if err != nil {
log.Errorf("looking up user group %q: %v", *userGroup, err)
return
}
userGroupGidSaved = g.Gid
})
return userGroupGidSaved
}
func attachUserData(ctx context.Context, c net.Conn) context.Context {
uc, ok := c.(*net.UnixConn)
if !ok {
log.Warningf("asked to attachUserData to a non UnixConn (%T)", c)
return ctx
}
raw, err := uc.SyscallConn()
if err != nil {
log.Warningf("unable to get the underlying raw connection for UnixConn")
return ctx
}
var cred *unix.Ucred
if ctrlErr := raw.Control(func(fd uintptr) {
cred, err = unix.GetsockoptUcred(int(fd), unix.SOL_SOCKET, unix.SO_PEERCRED)
}); ctrlErr != nil {
log.Warningf("control operation to get username failed: %v", err)
return ctx
} else if err != nil {
log.Warningf("getsockoptucred failed: %v", err)
return ctx
}
u, err := user.LookupId(strconv.FormatUint(uint64(cred.Uid), 10))
if err != nil {
log.Warningf("looking up UID %d from unix socket: %v", cred.Uid, err)
return ctx
}
isPlainUser := false
ugs, err := u.GroupIds()
if err != nil {
log.Warningf("looking up groups for user %s: %v", u.Username)
} else if u.Username == "root" {
// We treat root as a plain user for convenience's sake.
isPlainUser = true
} else {
plainUserGid := userGroupGid()
for _, ug := range ugs {
if ug == plainUserGid {
isPlainUser = true
break
}
}
}
return context.WithValue(ctx, userContextKey, &userData{
Username: u.Username,
IsPlainUser: isPlainUser,
})
}
func main() {
flag.Parse()
vcfg := vapi.DefaultConfig()
vcfg.AgentAddress = *agentAddr
v, err := vapi.NewClient(vcfg)
if err != nil {
log.Exitf("creating vault client against %v: %v", *agentAddr, err)
}
ctx := context.Background()
c := newCache(&vaultTokenInteractor{v})
go func() {
t := time.NewTicker(*cacheTickInterval)
defer t.Stop()
for {
select {
case <-ctx.Done():
return
case <-t.C:
if err := c.tick(ctx); err != nil {
log.Errorf("ticking the cache: %v", err)
}
}
}
}()
d := &net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
}
agentPath := strings.TrimPrefix(*agentAddr, "unix://")
vp := &vaultProxier{v: v, c: c, hc: &http.Client{
Transport: &http.Transport{
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
// Ignore what they want.
return d.DialContext(ctx, "unix", agentPath)
},
ForceAttemptHTTP2: true,
MaxIdleConns: 100,
IdleConnTimeout: 90 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
},
}}
// Just try to delete the listen path before we start listening.
os.Remove(*listenPath)
listener, err := net.Listen("unix", *listenPath)
if err != nil {
log.Exitf("listening on %v: %v", *listenPath, err)
}
if err := os.Chmod(*listenPath, 0777); err != nil {
log.Exitf("chmodding our unix socket at %v: %v", *listenPath, err)
}
m := http.NewServeMux()
m.Handle("/", vp)
m.HandleFunc("/tokend/cachez", func(rw http.ResponseWriter, r *http.Request) {
c.l.RLock()
for username, token := range c.m {
accessor, _ := token.TokenSecret.TokenAccessor()
log.Infof("cachez: %v: accessor= %v ; expiration=%v, renewal at=%v", username, accessor, token.expiration, token.renewThreshold)
}
c.l.RUnlock()
rw.Header().Set("Content-type", "text/plain")
fmt.Fprintf(rw, "written to log\n")
})
log.Infof("listening on %v", *listenPath)
server := &http.Server{Handler: m, ConnContext: attachUserData}
log.Exit(server.Serve(listener))
}
package main
import (
"context"
"flag"
"fmt"
"regexp"
"time"
log "github.com/golang/glog"
vapi "github.com/hashicorp/vault/api"
)
var (
vaultTokenTTL = flag.String("vault_token_ttl", "20m", "TTL at which token must be renewed.")
vaultTokenMaxTTL = flag.String("vault_token_max_ttl", "1d", "TTL at which token must be reissued.")
vaultTokenRenewable = flag.Bool("vault_token_renewable", true, "Whether the tokens are renewable or reissued every time.")
)
type vaultTokenInteractor struct {
v *vapi.Client
}
var _ tokenInteractor = ((*vaultTokenInteractor)(nil))
func (i *vaultTokenInteractor) Revoke(ctx context.Context, tokenSecret *TokenSecret) error {
// Use the token, since we have it, to revoke itself.
c, err := i.v.Clone()
if err != nil {
return fmt.Errorf("generating a new Vault client: %w", err)
}
token, err := tokenSecret.TokenID()
if err != nil {
return fmt.Errorf("getting token from secret: %w", err)
}
c.SetToken(token)
// Since we have the token, we could potentially use /revoke, or even /revoke-self.
if err := c.Auth().Token().RevokeSelf(""); err != nil {
return fmt.Errorf("revoking token: %w", err)
}
return nil
}
func computePolicies(selfPolicies []string, username string, isPlainUser bool) []string {
appMatchRE := regexp.MustCompile(fmt.Sprintf(`^(server/[^/]+/)?app/%s$`, regexp.QuoteMeta(username)))
userMatchRE := regexp.MustCompile(fmt.Sprintf(`^(server/[^/]+/user|server-user)(/%s)?$`, regexp.QuoteMeta(username)))
var outPolicies []string
for _, p := range selfPolicies {
if p == "default" || appMatchRE.MatchString(p) || (isPlainUser && userMatchRE.MatchString(p)) {
outPolicies = append(outPolicies, p)
}
}
return outPolicies
}
func policiesForToken(ts *TokenSecret) ([]string, error) {
var ps []string
psIntf, ok := ts.Data["policies"].([]interface{})
if !ok {