Manage Databricks Workspaces
This guide explains how to manage Azure Databricks workspaces using Terraform as of 2025, following the latest best practices.
Provider Configuration
The following configuration blocks initialize the most common variables, databricks_spark_version, databricks_node_type, and databricks_current_user.
terraform {
required_providers {
databricks = {
source = "databricks/databricks"
version = "~> 1.41.0" # Always specify version constraints
}
}
# Best practice: Define backend for state management
backend "azurerm" {
# Configure with backend.tfvars or environment variables
}
}
provider "databricks" {
# Auth configuration can be provided via environment variables
# or through the options below
# Option 1: Use Azure CLI authentication
# azure_use_msi = true
# Option 2: Azure Workload Identity Federation (recommended for production)
# azure_client_id = var.client_id
# azure_tenant_id = var.tenant_id
# azure_client_secret = var.client_secret
}
data "databricks_current_user" "me" {}
data "databricks_spark_version" "latest_lts" {
long_term_support = true # Using LTS versions is recommended for production
}
# Find appropriate node type based on requirements
data "databricks_node_type" "smallest" {
local_disk = true
min_memory_gb = 8 # Setting minimum performance criteria
category = "General Purpose"
}
Standard Functionality
These resources do not require administrative privileges. More documentation is available at the dedicated pages databricks_secret_scope, databricks_token, databricks_secret, databricks_notebook, databricks_job, databricks_cluster, databricks_cluster_policy, databricks_instance_pool.
# Create a secret scope with proper naming convention
resource "databricks_secret_scope" "this" {
name = "demo-${data.databricks_current_user.me.alphanumeric}"
# Best practice: Use Azure Key Vault backed secret scope for production
# backend_type = "AZURE_KEYVAULT"
# azure_keyvault {
# resource_id = azurerm_key_vault.example.id
# dns_name = azurerm_key_vault.example.vault_uri
# }
}
# Best practice: Limit token lifetime and use descriptive comments
resource "databricks_token" "pat" {
comment = "Created by Terraform for ${var.environment} environment"
lifetime_seconds = 2592000 # 30 days max, rotate regularly
}
resource "databricks_secret" "token" {
string_value = databricks_token.pat.token_value
scope = databricks_secret_scope.this.name
key = "token"
}
# Use source control for notebooks in production
resource "databricks_notebook" "this" {
path = "${data.databricks_current_user.me.home}/Terraform"
language = "PYTHON"
content_base64 = base64encode(<<-EOT
# Best practice: Use secrets for sensitive information
token = dbutils.secrets.get('${databricks_secret_scope.this.name}', '${databricks_secret.token.key}')
print(f'This should be redacted: {token}')
# Add proper error handling in production notebooks
try:
# Your code here
print("Processing data...")
except Exception as e:
print(f"Error: {e}")
raise
EOT
)
# Best practice: Tag resources for better organization
metadata_base64 = base64encode(jsonencode({
"tags": ["managed-by-terraform", "environment:${var.environment}"]
}))
}
# Best practice: Use job clusters instead of existing clusters for jobs
resource "databricks_job" "this" {
name = "Terraform Demo (${data.databricks_current_user.me.alphanumeric})"
job_cluster {
job_cluster_key = "main-cluster"
new_cluster {
num_workers = 1 # Use autoscaling for production workloads
spark_version = data.databricks_spark_version.latest_lts.id
node_type_id = data.databricks_node_type.smallest.id
# Best practice: Apply consistent tags
custom_tags = {
"Environment" = var.environment
"ManagedBy" = "Terraform"
"Department" = var.department
"CostCenter" = var.cost_center
}
spark_conf = {
# Enable Delta optimizations
"spark.databricks.delta.optimizeWrite.enabled" = "true"
"spark.databricks.delta.autoCompact.enabled" = "true"
}
# Enable Photon for better performance
photon_enabled = true
}
}
task {
task_key = "main"
job_cluster_key = "main-cluster"
notebook_task {
notebook_path = databricks_notebook.this.path
base_parameters = {
"environment" = var.environment
}
}
email_notifications {
on_success = [data.databricks_current_user.me.user_name]
on_failure = [data.databricks_current_user.me.user_name]
}
}
# Schedule with proper timezone configuration
schedule {
quartz_cron_expression = "0 0 10 ? * MON-FRI" # Weekdays at 10:00 AM
timezone_id = "UTC"
}
# Define retry policy
max_retries = 2
retry_on_timeout = true
# Set expectations and timeout for mission-critical jobs
timeout_seconds = 3600 # 1 hour timeout
# Best practice: Git source integration for CI/CD
git_source {
url = "https://github.com/your-organization/your-repo"
provider = "gitHub"
branch = "main"
}
}
# Best practice: Use cluster policies to enforce standards
resource "databricks_cluster" "this" {
cluster_name = "Exploration (${data.databricks_current_user.me.alphanumeric})"
spark_version = data.databricks_spark_version.latest_lts.id
instance_pool_id = databricks_instance_pool.smallest_nodes.id
policy_id = databricks_cluster_policy.this.id
autotermination_minutes = 20
# Best practice: Enable autoscaling for cost efficiency
autoscale {
min_workers = 1
max_workers = 10
}
# Enable Photon for better query performance
photon_enabled = true
# Use init scripts for consistent configuration
init_scripts {
dbfs {
destination = "dbfs:/databricks/scripts/init-cluster.sh"
}
}
custom_tags = {
"Environment" = var.environment
"ManagedBy" = "Terraform"
"Department" = var.department
"CostCenter" = var.cost_center
}
}
# Best practice: Define standard cluster policies for governance and cost control
resource "databricks_cluster_policy" "this" {
name = "Minimal (${data.databricks_current_user.me.alphanumeric})"
definition = jsonencode({
"dbus_per_hour": {
"type": "range",
"maxValue": 10
},
"autotermination_minutes": {
"type": "fixed",
"value": 20,
"hidden": true
},
"spark_version": {
"type": "allowlist",
"values": [data.databricks_spark_version.latest_lts.id],
"defaultValue": data.databricks_spark_version.latest_lts.id
},
"instance_pool_id": {
"type": "fixed",
"value": databricks_instance_pool.smallest_nodes.id,
"hidden": false
}
})
}
# Best practice: Use instance pools for faster cluster startup times
resource "databricks_instance_pool" "smallest_nodes" {
instance_pool_name = "Smallest Nodes (${data.databricks_current_user.me.alphanumeric})"
min_idle_instances = 0
max_capacity = 30
node_type_id = data.databricks_node_type.smallest.id
preloaded_spark_versions = [
data.databricks_spark_version.latest_lts.id
]
# Reduce idle time costs
idle_instance_autotermination_minutes = 20
# Apply consistent tags
custom_tags = {
"Environment" = var.environment
"ManagedBy" = "Terraform"
}
# Azure-specific settings for disk type and encryption
azure_attributes {
availability = "SPOT_AZURE" # Use Spot instances for cost savings
spot_bid_max_price = 100 # Set maximum price as percentage
}
}
output "notebook_url" {
value = databricks_notebook.this.url
}
output "job_url" {
value = databricks_job.this.url
}
Workspace Security
Managing security requires administrative privileges. More documentation is available at the dedicated pages databricks_secret_acl, databricks_group, databricks_user, databricks_group_member, databricks_permissions.
# Best practice: Use groups instead of individual permissions
resource "databricks_group" "spectators" {
display_name = "Spectators (by ${data.databricks_current_user.me.alphanumeric})"
# Optionally sync with Azure AD groups in production
# external_id = "AAD-GROUP-ID"
}
# Best practice: Follow least privilege principle
resource "databricks_secret_acl" "spectators" {
principal = databricks_group.spectators.display_name
scope = databricks_secret_scope.this.name
permission = "READ"
}
# Manage users systematically
resource "databricks_user" "dummy" {
user_name = "dummy+${data.databricks_current_user.me.alphanumeric}@example.com"
display_name = "Dummy ${data.databricks_current_user.me.alphanumeric}"
# For production sync with identity provider:
# external_id = "AAD-USER-ID"
# Best practice: Set expiration for service accounts
# disable_as_user_deletion = true
}
# Assign users to groups rather than direct permissions
resource "databricks_group_member" "a" {
group_id = databricks_group.spectators.id
member_id = databricks_user.dummy.id
}
# Set granular permissions based on role
resource "databricks_permissions" "notebook" {
notebook_path = databricks_notebook.this.id
# Define access control for users
access_control {
user_name = databricks_user.dummy.user_name
permission_level = "CAN_RUN"
}
# Define access control for groups
access_control {
group_name = databricks_group.spectators.display_name
permission_level = "CAN_READ"
}
}
# Configure job permissions
resource "databricks_permissions" "job" {
job_id = databricks_job.this.id
# Best practice: Limit ownership to specific users/groups
access_control {
user_name = databricks_user.dummy.user_name
permission_level = "IS_OWNER"
}
access_control {
group_name = databricks_group.spectators.display_name
permission_level = "CAN_MANAGE_RUN"
}
}
# Define cluster access permissions
resource "databricks_permissions" "cluster" {
cluster_id = databricks_cluster.this.id
access_control {
user_name = databricks_user.dummy.user_name
permission_level = "CAN_RESTART"
}
access_control {
group_name = databricks_group.spectators.display_name
permission_level = "CAN_ATTACH_TO"
}
}
# Apply policy permissions
resource "databricks_permissions" "policy" {
cluster_policy_id = databricks_cluster_policy.this.id
access_control {
group_name = databricks_group.spectators.display_name
permission_level = "CAN_USE"
}
}
# Configure instance pool permissions
resource "databricks_permissions" "pool" {
instance_pool_id = databricks_instance_pool.smallest_nodes.id
access_control {
group_name = databricks_group.spectators.display_name
permission_level = "CAN_ATTACH_TO"
}
}
# Unity Catalog integration - 2025 best practice
resource "databricks_metastore" "this" {
name = "primary-metastore"
storage_root = "abfss://container@accountname.dfs.core.windows.net/metastore"
# delta_sharing_scope = "INTERNAL_AND_EXTERNAL"
}
# Create a Unity Catalog catalog
resource "databricks_catalog" "sandbox" {
metastore_id = databricks_metastore.this.id
name = "sandbox"
comment = "Sandbox catalog for development and testing"
properties = {
purpose = "development"
}
}
# Manage fine-grained permissions
resource "databricks_grants" "sandbox" {
catalog = databricks_catalog.sandbox.name
grant {
principal = databricks_group.spectators.display_name
privileges = ["USE_CATALOG", "USE_SCHEMA"]
}
}
Advanced Configuration
Use these configurations for network security and advanced workspace settings.
# Get the current IP for IP access list configuration
data "http" "my_ip" {
url = "https://ifconfig.me"
}
# Enable IP access lists
resource "databricks_workspace_conf" "this" {
custom_config = {
"enableIpAccessLists": "true"
# Enable improved security features
"enableTokensConfig": "true"
"maxTokenLifetimeDays": "30"
"enableWorkspaceFilesystem": "false" # Disable DBFS UI for security
# Enable Unity Catalog for the workspace
"enableUnifiedCatalog": "true"
}
}
# Create IP access list for restricted access
resource "databricks_ip_access_list" "only_me" {
label = "only ${data.http.my_ip.body} is allowed to access workspace"
list_type = "ALLOW"
ip_addresses = ["${data.http.my_ip.body}/32"]
depends_on = [databricks_workspace_conf.this]
}
# Configure token usage permissions (2025 feature)
resource "databricks_token_usage" "service_principal" {
application_id = var.service_principal_app_id
comment = "Service Principal used for CI/CD"
permission = "USER_API"
}
# Configure Private Link for secure connectivity
resource "databricks_mws_private_access_settings" "this" {
private_access_settings_name = "Private-Link"
region = var.region
public_access_enabled = false
}
# Configure automated PAT rotation using Azure Key Vault
resource "azurerm_key_vault_secret" "pat" {
name = "databricks-pat"
value = databricks_token.pat.token_value
key_vault_id = azurerm_key_vault.example.id
# Set expiration time
expiration_date = timeadd(timestamp(), "720h") # 30 days
}
# Configure audit logs export to Azure Log Analytics
resource "databricks_workspace_conf" "audit" {
custom_config = {
"enableAuditLogs": "true"
}
}
resource "azurerm_log_analytics_workspace" "this" {
name = "databricks-logs-${var.environment}"
resource_group_name = var.resource_group_name
location = var.location
sku = "PerGB2018"
retention_in_days = 30
}
# Export logs using Azure Diagnostic Settings
resource "azurerm_monitor_diagnostic_setting" "this" {
name = "databricks-diagnostics"
target_resource_id = var.databricks_workspace_id
log_analytics_workspace_id = azurerm_log_analytics_workspace.this.id
enabled_log {
category = "audit"
retention_policy {
enabled = true
days = 30
}
}
}
Best Practices for Workspace Management (2025)
- Follow Least Privilege Principle: Grant minimal permissions required for each role
- Use Instance Pools: Reduce cluster start times and optimize costs
- Implement Cluster Policies: Enforce governance and cost controls
- Enable Unity Catalog: Manage data governance and security at the object level
- Leverage Azure DevOps or GitHub Actions: Automate Terraform deployments
- Use Service Principals: Avoid personal tokens for automation
- Implement Network Security: Configure IP access lists and Private Link
- Monitor with Log Analytics: Export audit logs for security monitoring
- Rotate Secrets Regularly: Set up automated token rotation
- Use Spot Instances: Reduce costs for non-production workloads
- Tag All Resources: Improve cost tracking and resource organization
- Implement GitOps: Source control all infrastructure and code
For production environments, consider deploying Databricks in an Azure landing zone with proper network isolation, automated CI/CD pipelines, and comprehensive monitoring.