train_input_path: "data/test/kuairand-mot-1k-train-c4096-s100.parquet" eval_input_path: "data/test/kuairand-mot-1k-eval-c4096-s100.parquet" model_dir: "experiments/kuairand/ultra_hstu" train_config { sparse_optimizer { rowwise_adagrad_optimizer { lr: 0.001 } constant_learning_rate { } } dense_optimizer { adam_optimizer { lr: 0.001 } constant_learning_rate { } } num_epochs: 1 save_checkpoints_epochs: 1 mixed_precision: "BF16" } data_config { batch_size: 8 dataset_type: ParquetDataset num_workers: 2 fg_mode: FG_DAG label_fields: ["cand_seq__action_weight", "cand_seq__watch_time"] } feature_configs { sequence_feature { sequence_name: "click_seq" sequence_length: 8000 sequence_delim: "|" features { id_feature { feature_name: "video_id" expression: "item:video_id" embedding_name: "video_id_emb" embedding_dim: 32 num_buckets: 10000000 data_type: "FP16" } } features { raw_feature { feature_name: "action_timestamp" expression: "user:action_timestamp" } } features { raw_feature { feature_name: "action_weight" expression: "user:action_weight" } } features { raw_feature { feature_name: "watch_time" expression: "user:watch_time" } } } } feature_configs { sequence_feature { sequence_name: "view_seq" sequence_length: 8000 sequence_delim: "|" features { id_feature { feature_name: "video_id" expression: "item:video_id" embedding_name: "video_id_emb" embedding_dim: 32 num_buckets: 10000000 data_type: "FP16" } } features { raw_feature { feature_name: "action_timestamp" expression: "user:action_timestamp" } } features { raw_feature { feature_name: "action_weight" expression: "user:action_weight" } } features { raw_feature { feature_name: "watch_time" expression: "user:watch_time" } } } } feature_configs { sequence_feature { sequence_name: "cand_seq" sequence_length: 8000 sequence_delim: "|" features { id_feature { feature_name: "video_id" expression: "item:video_id" embedding_name: "video_id_emb" embedding_dim: 32 num_buckets: 10000000 data_type: "FP16" } } features { raw_feature { feature_name: "query_time" expression: "user:query_time" } } } } feature_configs { id_feature { feature_name: "user_id" expression: "user:user_id" embedding_dim: 32 num_buckets: 10000000 data_type: "FP16" } } feature_configs { id_feature { feature_name: "user_active_degree" expression: "user:user_active_degree" embedding_dim: 32 num_buckets: 8 data_type: "FP16" } } feature_configs { id_feature { feature_name: "follow_user_num_range" expression: "user:follow_user_num_range" embedding_dim: 32 num_buckets: 9 data_type: "FP16" } } feature_configs { id_feature { feature_name: "fans_user_num_range" expression: "user:fans_user_num_range" embedding_dim: 32 num_buckets: 9 data_type: "FP16" } } feature_configs { id_feature { feature_name: "friend_user_num_range" expression: "user:friend_user_num_range" embedding_dim: 32 num_buckets: 8 data_type: "FP16" } } feature_configs { id_feature { feature_name: "register_days_range" expression: "user:register_days_range" embedding_dim: 32 num_buckets: 8 data_type: "FP16" } } model_config { feature_groups { group_name: "contextual" feature_names: "user_id" feature_names: "user_active_degree" feature_names: "follow_user_num_range" feature_names: "fans_user_num_range" feature_names: "friend_user_num_range" feature_names: "register_days_range" group_type: DEEP } # Channel "uih_click": click-style UIH stack. Reads the # action-weight & 1 sub-sequence derived from kuairand uih_seq # (see experiments/preprocess_kuairand_mot.py). feature_groups { group_name: "uih_click" feature_names: "click_seq__video_id" group_type: JAGGED_SEQUENCE } feature_groups { group_name: "uih_click_action" feature_names: "click_seq__action_weight" group_type: JAGGED_SEQUENCE } feature_groups { group_name: "uih_click_watchtime" feature_names: "click_seq__watch_time" group_type: JAGGED_SEQUENCE } feature_groups { group_name: "uih_click_timestamp" feature_names: "click_seq__action_timestamp" group_type: JAGGED_SEQUENCE } # Channel "uih_view": view-style UIH stack. Reads the # action-weight & 64 (long_view) sub-sequence. Both channels # share the video_id_emb embedding table. feature_groups { group_name: "uih_view" feature_names: "view_seq__video_id" group_type: JAGGED_SEQUENCE } feature_groups { group_name: "uih_view_action" feature_names: "view_seq__action_weight" group_type: JAGGED_SEQUENCE } feature_groups { group_name: "uih_view_watchtime" feature_names: "view_seq__watch_time" group_type: JAGGED_SEQUENCE } feature_groups { group_name: "uih_view_timestamp" feature_names: "view_seq__action_timestamp" group_type: JAGGED_SEQUENCE } # Shared candidate-side groups. feature_groups { group_name: "candidate" feature_names: "cand_seq__video_id" group_type: JAGGED_SEQUENCE } feature_groups { group_name: "candidate_timestamp" feature_names: "cand_seq__query_time" group_type: JAGGED_SEQUENCE } ultra_hstu { hstu { name: "uih_click" stu { embedding_dim: 512 num_heads: 4 hidden_dim: 128 attention_dim: 128 output_dropout_ratio: 0.1 use_group_norm: true sla_k1: 256 sla_k2: 32 } input_dropout_ratio: 0.2 attn_num_layers: 4 attn_truncation_split_layer: 2 attn_truncation_tail_len: 512 positional_encoder { num_position_buckets: 8192 num_time_buckets: 2048 use_time_encoding: true } input_preprocessor { contextual_preprocessor { action_encoder { simple_action_encoder { action_embedding_dim: 8 action_weights: [1, 2, 4, 8, 16, 32, 64, 128] } } action_mlp { simple_mlp { hidden_dim: 256 } } content_encoder { slice_content_encoder {} } content_mlp { simple_mlp { hidden_dim: 256 } } } } output_postprocessor { timestamp_layernorm_postprocessor { time_duration_period_units: [3600, 86400] time_duration_units_per_period: [24, 7] } } } hstu { name: "uih_view" stu { embedding_dim: 512 num_heads: 4 hidden_dim: 128 attention_dim: 128 output_dropout_ratio: 0.1 use_group_norm: true sla_k1: 256 sla_k2: 32 } input_dropout_ratio: 0.2 attn_num_layers: 4 attn_truncation_split_layer: 2 attn_truncation_tail_len: 512 positional_encoder { num_position_buckets: 8192 num_time_buckets: 2048 use_time_encoding: true } input_preprocessor { contextual_preprocessor { action_encoder { simple_action_encoder { action_embedding_dim: 8 action_weights: [1, 2, 4, 8, 16, 32, 64, 128] } } action_mlp { simple_mlp { hidden_dim: 256 } } content_encoder { slice_content_encoder {} } content_mlp { simple_mlp { hidden_dim: 256 } } } } output_postprocessor { timestamp_layernorm_postprocessor { time_duration_period_units: [3600, 86400] time_duration_units_per_period: [24, 7] } } } fusion_mtl_tower { mlp { hidden_units: 512 activation: "nn.SiLU" use_ln: true } task_configs { task_name: "is_click" label_name: "cand_seq__action_weight" task_bitmask: 1 losses { binary_cross_entropy {} } metrics { auc {} } } task_configs { task_name: "is_like" label_name: "cand_seq__action_weight" task_bitmask: 2 num_class: 2 losses { jrc_loss { session_name: "user_id" } } metrics { grouped_auc { grouping_key: "user_id" } } } task_configs { task_name: "is_follow" label_name: "cand_seq__action_weight" task_bitmask: 4 losses { binary_cross_entropy {} } metrics { auc {} } } task_configs { task_name: "is_comment" label_name: "cand_seq__action_weight" task_bitmask: 8 losses { binary_cross_entropy {} } metrics { auc {} } } task_configs { task_name: "is_forward" label_name: "cand_seq__action_weight" task_bitmask: 16 losses { binary_cross_entropy {} } metrics { auc {} } } task_configs { task_name: "is_hate" label_name: "cand_seq__action_weight" task_bitmask: 32 losses { binary_cross_entropy {} } metrics { auc {} } } task_configs { task_name: "long_view" label_name: "cand_seq__action_weight" task_bitmask: 64 losses { binary_cross_entropy {} } metrics { auc {} } } task_configs { task_name: "is_profile_enter" label_name: "cand_seq__action_weight" task_bitmask: 128 losses { binary_cross_entropy {} } metrics { auc {} } } task_configs { task_name: "watchtime" label_name: "cand_seq__watch_time" losses { l2_loss {} } metrics { mean_absolute_error {} } } }, max_seq_len: 4096 } kernel: CUTLASS }