mirror of
https://github.com/AnxiousAnt/PatchGen.git
synced 2026-03-31 11:19:47 +02:00
1 line
9.8 KiB
Plaintext
1 line
9.8 KiB
Plaintext
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"pygments_lexer":"ipython3","nbconvert_exporter":"python","version":"3.6.4","file_extension":".py","codemirror_mode":{"name":"ipython","version":3},"name":"python","mimetype":"text/x-python"},"kaggle":{"accelerator":"none","dataSources":[],"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"%%capture\n!pip install pip3-autoremove\n!pip-autoremove torch torchvision torchaudio -y\n!pip install \"torch==2.4.0\" \"xformers==0.0.27.post2\" triton torchvision torchaudio\n!pip install \"unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git\"\n\nimport os\nos.environ[\"WANDB_DISABLED\"] = \"true\"","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"from unsloth import FastLanguageModel\nimport torch\nmax_seq_length = 2048 \ndtype = None \nload_in_4bit = True \n\nmodel, tokenizer = FastLanguageModel.from_pretrained(\n model_name = \"unsloth/mistral-7b-v0.3-bnb-4bit\",\n max_seq_length = max_seq_length,\n dtype = dtype,\n load_in_4bit = load_in_4bit,\n)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"model = FastLanguageModel.get_peft_model(\n model,\n r = 128, \n target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n \"gate_proj\", \"up_proj\", \"down_proj\",\n\n \"embed_tokens\", \"lm_head\",], # Add for continual pretraining\n lora_alpha = 32,\n lora_dropout = 0,\n bias = \"none\", \n use_gradient_checkpointing = \"unsloth\", \n random_state = 9984,\n use_rslora = True, \n loftq_config = None,\n)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"### Data Prep\n","metadata":{}},{"cell_type":"code","source":"format_prompt = \"\"\"\n{}\"\"\"\n\nEOS_TOKEN = tokenizer.eos_token # add EOS_TOKEN\ndef formatting_prompts_func(examples):\n texts = examples[\"Patch Contents\"]\n outputs = []\n for text in texts:\n text = format_prompt.format(text) + EOS_TOKEN\n outputs.append(text)\n return { \"text\" : outputs, }\npass","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"from datasets import load_dataset\n\ndataset = load_dataset(\"ParZiVal04/Pd-patches-14k-dataset\", split = \"train\",)\n\ndataset = dataset.map(formatting_prompts_func, batched = True,)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"for row in dataset[:5][\"Patch Contents\"]:\n print(\"=========================\")\n print(row)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"### Continued Pretraining","metadata":{}},{"cell_type":"code","source":"from transformers import TrainingArguments\nfrom unsloth import is_bfloat16_supported\nfrom unsloth import UnslothTrainer, UnslothTrainingArguments\n\ntrainer = UnslothTrainer(\n model = model,\n tokenizer = tokenizer,\n train_dataset = dataset,\n dataset_text_field = \"text\",\n max_seq_length = max_seq_length,\n dataset_num_proc = 2,\n\n args = UnslothTrainingArguments(\n per_device_train_batch_size = 2,\n gradient_accumulation_steps = 8,\n\n warmup_ratio = 0.1,\n num_train_epochs = 1,\n\n learning_rate = 5e-5,\n embedding_learning_rate = 1e-5,\n\n fp16 = not is_bfloat16_supported(),\n bf16 = is_bfloat16_supported(),\n logging_steps = 1,\n optim = \"adamw_8bit\",\n weight_decay = 0.01,\n lr_scheduler_type = \"linear\",\n seed = 3407,\n output_dir = \"outputs\",\n ),\n)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"#@title Show current memory stats\ngpu_stats = torch.cuda.get_device_properties(0)\nstart_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\nmax_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\nprint(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\nprint(f\"{start_gpu_memory} GB of memory reserved.\")","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"trainer_stats = trainer.train()","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"try:\n # Attempt to push model to the hub\n model.push_to_hub(\"hf/model\", token=\"\")\n print(\"Model pushed to the hub successfully.\")\n\n # Attempt to push tokenizer to the hub\n tokenizer.push_to_hub(\"hf/model\", token=\"\")\n print(\"Tokenizer pushed to the hub successfully.\")\n\nexcept Exception as e:\n # Print the exception if something goes wrong\n print(f\"An error occurred: {e}\")","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"### Instruction Finetuning\n","metadata":{}},{"cell_type":"code","source":"alpaca_prompt = \"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{}\n\n### Input:\n{}\n\n### Response:\n{}\"\"\"\n\nEOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\ndef formatting_prompts_func(examples):\n instructions = examples[\"instruction\"]\n inputs = examples[\"prompt\"]\n outputs = examples[\"output\"]\n texts = []\n for instruction, input, output in zip(instructions, inputs, outputs):\n text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n texts.append(text)\n return { \"text\" : texts }\n\n\nfrom datasets import load_dataset\ndataset = load_dataset(\"parzi-parzi/patch-gen-dataset-v0.8.7\", split = \"train\")\ndataset = dataset.map(formatting_prompts_func, batched = True,)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"print(dataset[0])","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"from transformers import TrainingArguments\nfrom unsloth import is_bfloat16_supported\nfrom unsloth import UnslothTrainer, UnslothTrainingArguments\n\ntrainer = UnslothTrainer(\n model = model,\n tokenizer = tokenizer,\n train_dataset = dataset,\n dataset_text_field = \"text\",\n max_seq_length = max_seq_length,\n dataset_num_proc = 8,\n\n args = UnslothTrainingArguments(\n per_device_train_batch_size = 2,\n gradient_accumulation_steps = 8,\n warmup_ratio = 0.1,\n num_train_epochs = 2,\n learning_rate = 5e-5,\n embedding_learning_rate = 1e-5,\n\n fp16 = not is_bfloat16_supported(),\n bf16 = is_bfloat16_supported(),\n logging_steps = 1,\n optim = \"adamw_8bit\",\n weight_decay = 0.00,\n lr_scheduler_type = \"linear\",\n seed = 3407,\n output_dir = \"outputs\",\n ),\n)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"trainer_stats = trainer.train()","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"model.push_to_hub(\"hf/model\", token = \"\")\ntokenizer.push_to_hub(\"hf/model\", token = \"\") ","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"code","source":"#@title Show final memory and time stats\nused_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\nused_memory_for_lora = round(used_memory - start_gpu_memory, 3)\nused_percentage = round(used_memory /max_memory*100, 3)\nlora_percentage = round(used_memory_for_lora/max_memory*100, 3)\nprint(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\nprint(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\nprint(f\"Peak reserved memory = {used_memory} GB.\")\nprint(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\nprint(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\nprint(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"### Inference\n","metadata":{}},{"cell_type":"code","source":"FastLanguageModel.for_inference(model)\ninputs = tokenizer(\n[\n alpaca_prompt.format(\n \"create a Pd patch that matches the following request.\", # instruction\n \"Create a Pure Data patch that takes an integer and shifts its binary representation to the right by a specified number of places.\", # input\n \"\", # output - leave this blank for generation!\n )\n], return_tensors = \"pt\").to(\"cuda\")\n\nfrom transformers import TextStreamer\ntext_streamer = TextStreamer(tokenizer)\n_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 512)","metadata":{"trusted":true},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"### GGUF / llama.cpp Conversion","metadata":{}},{"cell_type":"code","source":"# Save to 8bit Q8_0\nif False: model.save_pretrained_gguf(\"model\", tokenizer,)\nif False: model.push_to_hub_gguf(\"hf/model\", tokenizer, token = \"\")\n\n# Save to 16bit GGUF\nif False: model.save_pretrained_gguf(\"model\", tokenizer, quantization_method = \"f16\")\nif False: model.push_to_hub_gguf(\"hf/model\", tokenizer, quantization_method = \"f16\", token = \"\")\n\n# Save to q4_k_m GGUF\nif False: model.save_pretrained_gguf(\"model\", tokenizer, quantization_method = \"q4_k_m\")\nif False: model.push_to_hub_gguf(\"hf/model\", tokenizer, quantization_method = \"q4_k_m\", token = \"\")\nif False: model.push_to_hub_gguf(\"hf/model\", tokenizer, quantization_method = \"q5_k_m\", token = \"\")","metadata":{"trusted":true},"outputs":[],"execution_count":null}]} |