PatchGen/training-code/patchgen-training-code.ipynb


			
				
					
					
						
						
							
							
							{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"pygments_lexer":"ipython3","nbconvert_exporter":"python","version":"3.6.4","file_extension":".py","codemirror_mode":{"name":"ipython","version":3},"name":"python","mimetype":"text/x-python"},"kaggle":{"accelerator":"none","dataSources":[],"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"%%capture\n!pip install pip3-autoremove\n!pip-autoremove torch torchvision torchaudio -y\n!pip install \"torch==2.4.0\" \"xformers==0.0.27.post2\" triton torchvision torchaudio\n!pip install \"unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git\"\n\nimport os\nos.environ[\"WANDB_DISABLED\"] = \"true\"","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"from unsloth import FastLanguageModel\nimport torch\nmax_seq_length = 2048 \ndtype = None \nload_in_4bit = True \n\nmodel, tokenizer = FastLanguageModel.from_pretrained(\n    model_name = \"unsloth/mistral-7b-v0.3-bnb-4bit\",\n    max_seq_length = max_seq_length,\n    dtype = dtype,\n    load_in_4bit = load_in_4bit,\n)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"model = FastLanguageModel.get_peft_model(\n    model,\n    r = 128, \n    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n                      \"gate_proj\", \"up_proj\", \"down_proj\",\n\n                      \"embed_tokens\", \"lm_head\",], # Add for continual pretraining\n    lora_alpha = 32,\n    lora_dropout = 0,\n    bias = \"none\",    \n    use_gradient_checkpointing = \"unsloth\", \n    random_state = 9984,\n    use_rslora = True,   \n    loftq_config = None,\n)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"### Data Prep\n","metadata":{}},{"cell_type":"code","source":"format_prompt = \"\"\"\n{}\"\"\"\n\nEOS_TOKEN = tokenizer.eos_token # add EOS_TOKEN\ndef formatting_prompts_func(examples):\n    texts  = examples[\"Patch Contents\"]\n    outputs = []\n    for text in texts:\n        text = format_prompt.format(text) + EOS_TOKEN\n        outputs.append(text)\n    return { \"text\" : outputs, }\npass","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"from datasets import load_dataset\n\ndataset = load_dataset(\"ParZiVal04/Pd-patches-14k-dataset\", split = \"train\",)\n\ndataset = dataset.map(formatting_prompts_func, batched = True,)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"for row in dataset[:5][\"Patch Contents\"]:\n    print(\"=========================\")\n    print(row)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"### Continued Pretraining","metadata":{}},{"cell_type":"code","source":"from transformers import TrainingArguments\nfrom unsloth import is_bfloat16_supported\nfrom unsloth import UnslothTrainer, UnslothTrainingArguments\n\ntrainer = UnslothTrainer(\n    model = model,\n    tokenizer = tokenizer,\n    train_dataset = dataset,\n    dataset_text_field = \"text\",\n    max_seq_length = max_seq_length,\n    dataset_num_proc = 2,\n\n    args = UnslothTrainingArguments(\n        per_device_train_batch_size = 2,\n        gradient_accumulation_steps = 8,\n\n        warmup_ratio = 0.1,\n        num_train_epochs = 1,\n\n        learning_rate = 5e-5,\n        embedding_learning_rate = 1e-5,\n\n        fp16 = not is_bfloat16_supported(),\n        bf16 = is_bfloat16_supported(),\n        logging_steps = 1,\n        optim = \"adamw_8bit\",\n        weight_decay = 0.01,\n        lr_scheduler_type = \"linear\",\n        seed = 3407,\n        output_dir = \"outputs\",\n    ),\n)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"#@title Show current memory stats\ngpu_stats = torch.cuda.get_device_properties(0)\nstart_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\nmax_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\nprint(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\nprint(f\"{start_gpu_memory} GB of memory reserved.\")","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"trainer_stats = trainer.train()","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"try:\n    # Attempt to push model to the hub\n    model.push_to_hub(\"hf/model\", token=\"\")\n    print(\"Model pushed to the hub successfully.\")\n\n    # Attempt to push tokenizer to the hub\n    tokenizer.push_to_hub(\"hf/model\", token=\"\")\n    print(\"Tokenizer pushed to the hub successfully.\")\n\nexcept Exception as e:\n    # Print the exception if something goes wrong\n    print(f\"An error occurred: {e}\")","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"### Instruction Finetuning\n","metadata":{}},{"cell_type":"code","source":"alpaca_prompt = \"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{}\n\n### Input:\n{}\n\n### Response:\n{}\"\"\"\n\nEOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\ndef formatting_prompts_func(examples):\n    instructions = examples[\"instruction\"]\n    inputs       = examples[\"prompt\"]\n    outputs      = examples[\"output\"]\n    texts = []\n    for instruction, input, output in zip(instructions, inputs, outputs):\n        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n        texts.append(text)\n    return { \"text\" : texts }\n\n\nfrom datasets import load_dataset\ndataset = load_dataset(\"parzi-parzi/patch-gen-dataset-v0.8.7\", split = \"train\")\ndataset = dataset.map(formatting_prompts_func, batched = True,)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"print(dataset[0])","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"from transformers import TrainingArguments\nfrom unsloth import is_bfloat16_supported\nfrom unsloth import UnslothTrainer, UnslothTrainingArguments\n\ntrainer = UnslothTrainer(\n    model = model,\n    tokenizer = tokenizer,\n    train_dataset = dataset,\n    dataset_text_field = \"text\",\n    max_seq_length = max_seq_length,\n    dataset_num_proc = 8,\n\n    args = UnslothTrainingArguments(\n        per_device_train_batch_size = 2,\n        gradient_accumulation_steps = 8,\n        warmup_ratio = 0.1,\n        num_train_epochs = 2,\n        learning_rate = 5e-5,\n        embedding_learning_rate = 1e-5,\n\n        fp16 = not is_bfloat16_supported(),\n        bf16 = is_bfloat16_supported(),\n        logging_steps = 1,\n        optim = \"adamw_8bit\",\n        weight_decay = 0.00,\n        lr_scheduler_type = \"linear\",\n        seed = 3407,\n        output_dir = \"outputs\",\n    ),\n)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"trainer_stats = trainer.train()","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"model.push_to_hub(\"hf/model\", token = \"\")\ntokenizer.push_to_hub(\"hf/model\", token = \"\") ","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"#@title Show final memory and time stats\nused_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\nused_memory_for_lora = round(used_memory - start_gpu_memory, 3)\nused_percentage = round(used_memory         /max_memory*100, 3)\nlora_percentage = round(used_memory_for_lora/max_memory*100, 3)\nprint(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\nprint(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\nprint(f\"Peak reserved memory = {used_memory} GB.\")\nprint(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\nprint(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\nprint(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"### Inference\n","metadata":{}},{"cell_type":"code","source":"FastLanguageModel.for_inference(model)\ninputs = tokenizer(\n[\n    alpaca_prompt.format(\n        \"create a Pd patch that matches the following request.\", # instruction\n        \"Create a Pure Data patch that takes an integer and shifts its binary representation to the right by a specified number of places.\", # input\n        \"\", # output - leave this blank for generation!\n    )\n], return_tensors = \"pt\").to(\"cuda\")\n\nfrom transformers import TextStreamer\ntext_streamer = TextStreamer(tokenizer)\n_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 512)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"### GGUF / llama.cpp Conversion","metadata":{}},{"cell_type":"code","source":"# Save to 8bit Q8_0\nif False: model.save_pretrained_gguf(\"model\", tokenizer,)\nif False: model.push_to_hub_gguf(\"hf/model\", tokenizer, token = \"\")\n\n# Save to 16bit GGUF\nif False: model.save_pretrained_gguf(\"model\", tokenizer, quantization_method = \"f16\")\nif False: model.push_to_hub_gguf(\"hf/model\", tokenizer, quantization_method = \"f16\", token = \"\")\n\n# Save to q4_k_m GGUF\nif False: model.save_pretrained_gguf(\"model\", tokenizer, quantization_method = \"q4_k_m\")\nif False: model.push_to_hub_gguf(\"hf/model\", tokenizer, quantization_method = \"q4_k_m\", token = \"\")\nif False: model.push_to_hub_gguf(\"hf/model\", tokenizer, quantization_method = \"q5_k_m\", token = \"\")","metadata":{"trusted":true},"outputs":[],"execution_count":null}]}