def hf_clm_train(
    model_name:str='', 
    dataset_name:str="",
    context_length:int=128, 
    data:list=[],
    num_epochs:int=3, 
    batch_size:int=8, 
    fp16:bool=False, 
    bf16:bool=False,
    lr:float=5e-5, 
    from_hf:bool=True, 
    do_split:bool=True, 
    split_ratio:float=0.2,
    gradient_accumulation_steps:int=4, 
    gradient_checkpointing:bool=False,
    report_to:str='none', 
    wandb_api_key:str='', 
    wandb_config:wandbConfig=None,
    use_peft:bool=False, 
    peft_config=None, 
    hf_token:str='',
    hf_column:str='text', 
    lr_scheduler_type:str='linear', 
    eval_accumulation_steps:int=8,
    output_dir:str='clm_output', 
    ddp:bool=False, 
    zero:bool=True
):
model_name
string
required

The name or path of the pre-trained model to use.

dataset_name
string

The name of the dataset to be used for training. Defaults to an empty string.

context_length
int

The maximum length of the input sequences. Defaults to 128.

data
list

A list of strings to use as data if from_hf is False. Defaults to an empty list.

num_epochs
int

The number of training epochs. Defaults to 3.

batch_size
int

The batch size for training. Defaults to 8.

fp16
boolean

A flag to enable 16-bit floating-point (FP16) training. Defaults to False.

bf16
boolean

A flag to enable 16-bit Brain Floating Point (BF16) training. Defaults to False.

lr
float

The learning rate for optimization. Defaults to 5e-5.

from_hf
boolean

A flag to determine whether to load the dataset from Hugging Face. Defaults to True.

do_split
boolean

A flag to determine whether to split the dataset into training and validation sets. Defaults to True.

split_ratio
float

The ratio of the dataset to be used for validation. Defaults to 0.2.

gradient_accumulation_steps
int

The number of steps for gradient accumulation. Defaults to 4.

gradient_checkpointing
boolean

A flag to enable gradient checkpointing for reducing memory usage. Defaults to False.

report_to
string

The service to report training logs to (e.g., wandb). Defaults to 'none'.

wandb_api_key
string

The API key for Weights and Biases (WandB) logging. Defaults to an empty string.

wandb_config
wandbConfig

The configuration for Weights and Biases (WandB) logging. Defaults to None.

use_peft
boolean

A flag to enable Parameter-Efficient Fine-Tuning (PEFT). Defaults to False.

peft_config
object

The configuration object for PEFT. Defaults to None.

hf_token
string

The Hugging Face token required for accessing private datasets or models. Defaults to an empty string.

hf_column
string

The name of the column in the dataset to use for training. Defaults to 'text'.

lr_scheduler_type
string

The type of learning rate scheduler to use. Defaults to 'linear'.

eval_accumulation_steps
int

The number of steps for evaluation accumulation. Defaults to 8.

output_dir
string

The directory to save the output model and logs. Defaults to 'clm_output'.

ddp
boolean

A flag to enable Distributed Data Parallel (DDP) training. Defaults to False.

zero
boolean

A flag to enable ZeRO (Zero Redundancy Optimizer) for memory optimization. Defaults to True.