HomeGuidesRecipesChangelog
Log In
Guides

Comprehensive Configuration Example

⚠️

Check This First!

This article refers to BaseModel accessed via Docker container. Please refer to Snowflake Native App section if you are using BaseModel as SF GUI application.

In this article, we show the extensive configuration file that use Snowflake database and illustrates the usage of all data nad training parameters. For the descriptions of the available settings see sections Data Model and Sources and Configuring Parameters .


⚠️

Note

The following configuration file demonstrates the example use of parameters. The default values are optimized and it is not recommended to set custom values without a particular need.

data_sources:
  - type: main_entity_attribute
    main_entity_column: customer_id
    name: customers
    data_location:
      database_type: snowflake
      connection_params:
        user: ${SNOWFLAKE_USER}
        password: ${SNOWFLAKE_PASSWORD}
        account: ${SNOWFLAKE_ACCOUNT}
        warehouse: ${SNOWFLAKE_WAREHOUSE}
        database: HM_KAGGLE
        db_schema: PRIVATE
      table_name: CUSTOMERS
    where_condition:  '"Active" = 1'
  - type: event
    main_entity_column: customer_id
    name: transactions
    date_column: 
      name: t_dat
      format: 'yyyy-mm-dd'
    data_location:
      database_type: snowflake
      connection_params:
        user: ${SNOWFLAKE_USER}
        password: ${SNOWFLAKE_PASSWORD}
        account: ${SNOWFLAKE_ACCOUNT}
        role: ${SNOWFLAKE_ROLE}
        warehouse: ${SNOWFLAKE_WAREHOUSE}
        database: HM_KAGGLE
        db_schema: PRIVATE
      table_name: TRANSACTIONS_NO_ARTICLES
    partition_column: customer_id 
    partition_values_transformation: hash_mod
    disallowed_columns: 
      - price
    sql_lambdas:
      - alias: price_float
        expression: CAST({{resolve_fn("price")}} AS FLOAT)
    shared_entities:
      - name: product
        columns: [article_id, [prod_name, [articles]], [detail_desc, [articles]]]
        id_column: article_id
    joined_data_sources:
      - name: articles
        join_on:
          - [article_id, article_id]
        in_memory: false
    num_groups: 3
  - type: attribute
    name: articles
    data_location:
      database_type: snowflake
      connection_params:
        user: ${SNOWFLAKE_USER}
        password: ${SNOWFLAKE_PASSWORD}
        account: ${SNOWFLAKE_ACCOUNT}
        role: ${SNOWFLAKE_ROLE}
        warehouse: ${SNOWFLAKE_WAREHOUSE}
        database: HM_KAGGLE
        db_schema: PRIVATE
      table_name: ARTICLES
    allowed_columns:
      - article_id
      - prod_name
      - detail_desc
      - department_name
    column_type_overrides:
      prod_name: text
      detail_desc: text


data_params:
  data_start_date: 2018-09-20 00:00:00
  split: # entity-based split
    type: entity
    training: 95
    validation: 5
#   split:  # alternative time-based split
#     type: time
#     training:
#       start_date: 2018-09-20 00:00:00
#       end_date: 2018-09-01 00:00:00
#     validation:
#       start_date: 2020-09-07 00:00:00
#       end_date: 2020-09-14 00:00:00
#     test: 
#       start_date: 2020-09-14 00:00:00
#       end_date: 2020-09-21 00:00:00
  maximum_splitpoints_per_entity: 20
  split_point_data_sources: [transactions]
  split_point_inclusion_overrides: {transactions: future}
  max_data_splits_per_split_point: 200
  ignore_entities_without_events: true
  dynamic_events_sampling: True
  apply_event_count_weighting: False
  apply_recency_based_weighting: False
  limit_entity_num_events: null
  window_shuffling_buffer_size: 500_000_000
  cache_path: "/path/to/store/cache"

data_loader_params:
  batch_size: 256
  num_workers: 10
  pin_memory: False
  drop_last: False
  pin_memory_device: ""
  prefetch_factor: 2
  

training_params:
  epochs: 1
  learning_rate: 0.0003
  devices: 1
  accelerator: "gpu"
  strategy: null
  precision: bf16-mixed
  limit_train_batches: 10 # use only for debugging
  limit_val_batches: 10 # use use for debugging
  gradient_clip_val: null
  warm_start_steps: 0
  early_stopping: 
    min_delta: 0.001
    patience: 3
    verbose: False
  entity_ids: 
    subquery: 'SELECT DISTINCT("customer_id") FROM CUSTOMERS WHERE "age" > 18'
#     file: path_to_file  # alternative to the subquery
    matching: True
  
  

memory_constraining_params:
  hidden_dim: 2048
  num_layers: 4
  emde_quality: 1

query_optimization:
  num_query_chunks: 1
  num_concurrent_features: 4
  num_cpus: 4
  sampling_params:
    num_entities: null
    history_limit: null