obtain the original LLaMA model weights and place them in ./models

ls ./models 65B 30B 13B 7B tokenizer_checklist.chk tokenizer.model

install Python dependencies

python3 -m pip install -r requirements.txt

python3 convert.py models/7B/

./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin q4_0

./main -m ./models/7B/ggml-model-q4_0.bin -n 128

2023-06-05 Tags: github, llama, llama cpp, llm, self-hosted by klotz