diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 67893d82d8..435bec1ca6 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -4,6 +4,7 @@ ### CLI +* `auth login` no longer falls back to plaintext when the OS keyring is reachable but locked. The unlock prompt shown by the probe now runs in parallel with the OAuth flow, and the token is stored in the keyring once the user has typed their password. * `databricks auth describe` now reports where U2M (`databricks-cli`) tokens are stored: `plaintext` (`~/.databricks/token-cache.json`) or `secure` (OS keyring), and the source of the choice (env var, config setting, or default). * Marked the default profile in the interactive pickers shown by `databricks auth switch`, `databricks auth logout`, `databricks auth token`, and `databricks auth login`, and moved it to the top of the list. `databricks auth login` and `databricks auth logout` now offer the same selectors as `databricks auth token` and `databricks auth switch` respectively. diff --git a/cmd/auth/login.go b/cmd/auth/login.go index 03cd9859f8..cd0836c835 100644 --- a/cmd/auth/login.go +++ b/cmd/auth/login.go @@ -146,10 +146,10 @@ a new profile is created. ctx := cmd.Context() profileName := cmd.Flag("profile").Value.String() - // Resolve the cache before the browser step so a missing/locked keyring - // surfaces here rather than after the user completes OAuth. When secure - // is selected but the keyring is unreachable, this silently falls back - // to plaintext and persists auth_storage = plaintext for next time. + // Resolve the cache before the browser step so an unavailable + // keyring surfaces here rather than after OAuth. The probe also + // triggers the OS unlock prompt, which the user can answer during + // OAuth. tokenCache, mode, err := storage.ResolveCacheForLogin(ctx, "") if err != nil { return err diff --git a/libs/auth/storage/cache.go b/libs/auth/storage/cache.go index cfd204fa32..d3a5fa85d2 100644 --- a/libs/auth/storage/cache.go +++ b/libs/auth/storage/cache.go @@ -2,6 +2,7 @@ package storage import ( "context" + "errors" "fmt" "github.com/databricks/cli/libs/databrickscfg" @@ -54,11 +55,13 @@ func ResolveCache(ctx context.Context, override StorageMode) (cache.TokenCache, // 2. When the user explicitly asked for secure (override, env var, or // config) but the keyring is unreachable, return an error. An explicit // "I want secure" is honored strictly: never silently downgrade. +// 3. When the probe times out, stay on keyring regardless of explicit. +// The timeout is ambiguous (locked vs hung); a misdiagnosis fails +// the final Store rather than silently downgrading to plaintext. // -// Both rules are dormant today: the resolver default is plaintext, so +// Rules 1 and 2 are dormant today: the resolver default is plaintext, so // (mode=Secure, explicit=false) is unreachable. They activate when the -// default flips to secure (MS4 / cli-ga). Wiring lands now so MS4 is a -// single-line default flip plus pin-on-success additions. +// default flips to secure at GA. // // Login-specific. Read paths (auth token, bundle commands) keep the original // keyring error so they don't silently mint plaintext copies of tokens that @@ -120,7 +123,7 @@ func resolveCacheForLoginWith(ctx context.Context, override StorageMode, f cache // // Pin-on-success across modes (locking in the first working behavior to // insulate users from keyring flakiness) is intentionally not implemented -// here. It lands with MS4 alongside the default flip; pinning before the +// here. It lands at GA alongside the default flip; pinning before the // flip would freeze every default user into plaintext and make the flip a // no-op for them. func applyLoginFallback(ctx context.Context, mode StorageMode, explicit bool, f cacheFactories) (cache.TokenCache, StorageMode, error) { @@ -133,6 +136,15 @@ func applyLoginFallback(ctx context.Context, mode StorageMode, explicit bool, f return c, mode, nil case StorageModeSecure: if probeErr := f.probeKeyring(); probeErr != nil { + // Stay on keyring on timeout: a locked keyring being unlocked + // during OAuth is the common case, and a misdiagnosed hang + // fails the final Store anyway, which is better than a + // silent plaintext downgrade. + var timeoutErr *TimeoutError + if errors.As(probeErr, &timeoutErr) { + log.Debugf(ctx, "keyring probe timed out (%v); staying on keyring", probeErr) + return f.newKeyring(), mode, nil + } if explicit { return nil, "", fmt.Errorf("secure storage was requested but the OS keyring is not reachable: %w", probeErr) } @@ -159,7 +171,7 @@ func applyLoginFallback(ctx context.Context, mode StorageMode, explicit bool, f // Only called on the (mode=Secure, explicit=false) probe-failure branch. That // branch is unreachable today (resolver default is plaintext), so this is // dormant infrastructure: it activates when the default flips to secure -// (MS4) and lets default-on-broken-keyring users avoid a 3s probe on every +// at GA and lets default-on-broken-keyring users avoid a 3s probe on every // command. func persistPlaintextFallback(ctx context.Context) error { configPath := env.Get(ctx, "DATABRICKS_CONFIG_FILE") diff --git a/libs/auth/storage/cache_test.go b/libs/auth/storage/cache_test.go index 34d3f38c13..db059299dc 100644 --- a/libs/auth/storage/cache_test.go +++ b/libs/auth/storage/cache_test.go @@ -3,6 +3,7 @@ package storage import ( "context" "errors" + "fmt" "os" "path/filepath" "testing" @@ -232,6 +233,45 @@ func TestApplyLoginFallback_ExplicitSecure_ProbeFail_Errors(t *testing.T) { assert.Equal(t, "", persisted, "explicit-secure error must not write config") } +// A locked keyring with a slow user surfaces as TimeoutError. We want login +// to stay on the keyring so the final Store lands there once the user has +// finished unlocking, regardless of whether secure was explicit. Cover both +// the bare TimeoutError (in case probe wraps thinner in the future) and the +// real wrapped form returned by probeWithBackend. +func TestApplyLoginFallback_ProbeTimeout_StaysOnKeyring(t *testing.T) { + cases := []struct { + name string + explicit bool + probeErr error + }{ + {"default-secure, bare TimeoutError", false, &TimeoutError{Op: "set"}}, + {"default-secure, wrapped TimeoutError", false, fmt.Errorf("write: %w", &TimeoutError{Op: "set"})}, + {"explicit-secure, bare TimeoutError", true, &TimeoutError{Op: "set"}}, + {"explicit-secure, wrapped TimeoutError", true, fmt.Errorf("write: %w", &TimeoutError{Op: "set"})}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + hermetic(t) + ctx := t.Context() + configPath := env.Get(ctx, "DATABRICKS_CONFIG_FILE") + + f := fakeFactories(t) + f.probeKeyring = func() error { return tc.probeErr } + + got, mode, err := applyLoginFallback(ctx, StorageModeSecure, tc.explicit, f) + + require.NoError(t, err) + assert.Equal(t, StorageModeSecure, mode) + assert.Equal(t, "keyring", got.(stubCache).source) + + persisted, gerr := databrickscfg.GetConfiguredAuthStorage(ctx, configPath) + require.NoError(t, gerr) + assert.Equal(t, "", persisted, "probe timeout must not persist plaintext fallback") + }) + } +} + func TestWrapForOAuthArgument(t *testing.T) { const ( host = "https://example.com" diff --git a/libs/auth/storage/keyring.go b/libs/auth/storage/keyring.go index 4f00b88152..8ce87525b3 100644 --- a/libs/auth/storage/keyring.go +++ b/libs/auth/storage/keyring.go @@ -88,14 +88,10 @@ func NewKeyringCache() cache.TokenCache { } } -// ProbeKeyring returns nil if the OS keyring is reachable and accepts a -// write+delete cycle within the standard timeout. A non-nil error means the -// keyring cannot be used in this environment (no backend, headless Linux -// session waiting on a UI prompt, locked keychain refusing access, etc.). -// -// Used by databricks auth login to decide whether to silently fall back to -// plaintext storage before opening the browser, so the user does not -// complete an OAuth flow only to fail at the final Store call. +// ProbeKeyring returns nil if the OS keyring accepted a write+delete +// cycle within the standard timeout. *TimeoutError means the keyring +// was unresponsive (locked or hung, indistinguishable here); other +// errors are definitive failures. func ProbeKeyring() error { return probeWithBackend(zalandoBackend{}, defaultKeyringTimeout) }