forecasting
Creates, updates, deletes, gets or lists a forecasting resource.
Overview
| Name | forecasting |
| Type | Resource |
| Id | databricks_workspace.ml.forecasting |
Fields
The following fields are returned by SELECT queries:
- get
| Name | Datatype | Description |
|---|---|---|
experiment_id | string | The unique ID for the forecasting experiment. |
experiment_page_url | string | The URL to the forecasting experiment page. |
state | string | The current state of the forecasting experiment. (CANCELLED, FAILED, PENDING, RUNNING, SUCCEEDED) |
Methods
The following methods are available for this resource:
| Name | Accessible by | Required Params | Optional Params | Description |
|---|---|---|---|---|
get | select | experiment_id, deployment_name | Public RPC to get forecasting experiment | |
create | insert | deployment_name, train_data_path, target_column, time_column, forecast_granularity, forecast_horizon | Creates a serverless forecasting experiment. Returns the experiment ID. |
Parameters
Parameters can be passed in the WHERE clause of a query. Check the Methods section to see which parameters are required or optional for each operation.
| Name | Datatype | Description |
|---|---|---|
deployment_name | string | The Databricks Workspace Deployment Name (default: dbc-abcd0123-a1bc) |
experiment_id | string | The unique ID of a forecasting experiment |
SELECT examples
- get
Public RPC to get forecasting experiment
SELECT
experiment_id,
experiment_page_url,
state
FROM databricks_workspace.ml.forecasting
WHERE experiment_id = '{{ experiment_id }}' -- required
AND deployment_name = '{{ deployment_name }}' -- required
;
INSERT examples
- create
- Manifest
Creates a serverless forecasting experiment. Returns the experiment ID.
INSERT INTO databricks_workspace.ml.forecasting (
train_data_path,
target_column,
time_column,
forecast_granularity,
forecast_horizon,
custom_weights_column,
experiment_path,
future_feature_data_path,
holiday_regions,
include_features,
max_runtime,
prediction_data_path,
primary_metric,
register_to,
split_column,
timeseries_identifier_columns,
training_frameworks,
deployment_name
)
SELECT
'{{ train_data_path }}' /* required */,
'{{ target_column }}' /* required */,
'{{ time_column }}' /* required */,
'{{ forecast_granularity }}' /* required */,
{{ forecast_horizon }} /* required */,
'{{ custom_weights_column }}',
'{{ experiment_path }}',
'{{ future_feature_data_path }}',
'{{ holiday_regions }}',
'{{ include_features }}',
{{ max_runtime }},
'{{ prediction_data_path }}',
'{{ primary_metric }}',
'{{ register_to }}',
'{{ split_column }}',
'{{ timeseries_identifier_columns }}',
'{{ training_frameworks }}',
'{{ deployment_name }}'
RETURNING
experiment_id,
experiment_page_url,
state
;
# Description fields are for documentation purposes
- name: forecasting
props:
- name: deployment_name
value: "{{ deployment_name }}"
description: Required parameter for the forecasting resource.
- name: train_data_path
value: "{{ train_data_path }}"
description: |
The fully qualified path of a Unity Catalog table, formatted as catalog_name.schema_name.table_name, used as training data for the forecasting model.
- name: target_column
value: "{{ target_column }}"
description: |
The column in the input training table used as the prediction target for model training. The values in this column are used as the ground truth for model training.
- name: time_column
value: "{{ time_column }}"
description: |
The column in the input training table that represents each row's timestamp.
- name: forecast_granularity
value: "{{ forecast_granularity }}"
description: |
The time interval between consecutive rows in the time series data. Possible values include: '1 second', '1 minute', '5 minutes', '10 minutes', '15 minutes', '30 minutes', 'Hourly', 'Daily', 'Weekly', 'Monthly', 'Quarterly', 'Yearly'.
- name: forecast_horizon
value: {{ forecast_horizon }}
description: |
The number of time steps into the future to make predictions, calculated as a multiple of forecast_granularity. This value represents how far ahead the model should forecast.
- name: custom_weights_column
value: "{{ custom_weights_column }}"
description: |
The column in the training table used to customize weights for each time series.
- name: experiment_path
value: "{{ experiment_path }}"
description: |
The path in the workspace to store the created experiment.
- name: future_feature_data_path
value: "{{ future_feature_data_path }}"
description: |
The fully qualified path of a Unity Catalog table, formatted as catalog_name.schema_name.table_name, used to store future feature data for predictions.
- name: holiday_regions
value:
- "{{ holiday_regions }}"
description: |
The region code(s) to automatically add holiday features. Currently supports only one region.
- name: include_features
value:
- "{{ include_features }}"
description: |
Specifies the list of feature columns to include in model training. These columns must exist in the training data and be of type string, numerical, or boolean. If not specified, no additional features will be included. Note: Certain columns are automatically handled: - Automatically excluded: split_column, target_column, custom_weights_column. - Automatically included: time_column.
- name: max_runtime
value: {{ max_runtime }}
description: |
The maximum duration for the experiment in minutes. The experiment stops automatically if it exceeds this limit.
- name: prediction_data_path
value: "{{ prediction_data_path }}"
description: |
The fully qualified path of a Unity Catalog table, formatted as catalog_name.schema_name.table_name, used to store predictions.
- name: primary_metric
value: "{{ primary_metric }}"
description: |
The evaluation metric used to optimize the forecasting model.
- name: register_to
value: "{{ register_to }}"
description: |
The fully qualified path of a Unity Catalog model, formatted as catalog_name.schema_name.model_name, used to store the best model.
- name: split_column
value: "{{ split_column }}"
description: |
// The column in the training table used for custom data splits. Values must be 'train', 'validate', or 'test'.
- name: timeseries_identifier_columns
value:
- "{{ timeseries_identifier_columns }}"
description: |
The column in the training table used to group the dataset for predicting individual time series.
- name: training_frameworks
value:
- "{{ training_frameworks }}"
description: |
List of frameworks to include for model tuning. Possible values are 'Prophet', 'ARIMA', 'DeepAR'. An empty list includes all supported frameworks.