diff --git a/README.md b/README.md index 2c87c330e..6a248987e 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,13 @@ pip3 install git+https://github.com/huggingface/transformers@c612628045822f90902 ### Method 2: From source +0. Create a virtual environment. This was tested on Ubuntu 18.04 with python 3.9.5 but should work in other Ubuntu/Python combinations + +```bash +python3.9 -m venv ~/.venv/fastchat +. ~/.venv/fastchat/bin/activate +``` + 1. Clone this repository and navigate to FastChat folder ```bash git clone https://github.com/lm-sys/FastChat.git @@ -68,9 +75,31 @@ python3 -m fastchat.model.apply_delta \ --delta lmsys/vicuna-13b-delta-v0 ``` +Alternatively just do `python download-model.py anon8231489123/vicuna-13b-GPTQ-4bit-128g` to get the precomputed weights. This is copied from thisserand's Google Colab sheet. + ### Vicuna-7B Coming soon. +## Install CUDA + +For example on Ubuntu 18.04: +```bash +wget https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run +chmod +x cuda_12.1.1_530.30.02_linux.run +sudo sh cuda_12.1.1_530.30.02_linux.run +``` + +## Install GPTQ-forLLaMa + +```bash +cd FastChat +mkdir repositories +cd repositories +git clone https://github.com/oobabooga/GPTQ-for-LLaMa.git -b cuda +cd GPTQ-for-LLaMa +python setup_cuda.py install +cd ../.. + ## Serving ### Command Line Interface diff --git a/download-model.py b/download-model.py index 550d4af2e..e483eb033 100644 --- a/download-model.py +++ b/download-model.py @@ -108,7 +108,8 @@ def select_model_from_default_options(): def get_download_links_from_huggingface(model, branch): base = "https://huggingface.co" - page = f"/api/models/{model}/tree/{branch}?cursor=" + # Fix from https://github.com/oobabooga/text-generation-webui/pull/1373 + page = f"/api/models/{model}/tree/{branch}" cursor = b"" links = [] @@ -120,7 +121,12 @@ def get_download_links_from_huggingface(model, branch): has_safetensors = False is_lora = False while True: - content = requests.get(f"{base}{page}{cursor.decode()}").content + # Fix from https://github.com/oobabooga/text-generation-webui/pull/1373 + #content = requests.get(f"{base}{page}{cursor.decode()}").content + url = f"{base}{page}" + (f"?cursor={cursor.decode()}" if cursor else "") + r = requests.get(url) + r.raise_for_status() + content = r.content dict = json.loads(content) if len(dict) == 0: @@ -247,4 +253,4 @@ def download_files(file_list, output_folder, num_threads=8): # Downloading the files print(f"Downloading the model to {output_folder}") - download_files(links, output_folder, args.threads) \ No newline at end of file + download_files(links, output_folder, args.threads)