thisserand · andrei-radulescu-banu · May 6, 2023 · May 7, 2023 · May 7, 2023 · May 7, 2023
diff --git a/README.md b/README.md
@@ -35,6 +35,13 @@ pip3 install git+https://github.com/huggingface/transformers@c612628045822f90902
 
 ### Method 2: From source
 
+0. Create a virtual environment. This was tested on Ubuntu 18.04 with python 3.9.5 but should work in other Ubuntu/Python combinations
+
+```bash
+python3.9 -m venv ~/.venv/fastchat
+. ~/.venv/fastchat/bin/activate
+```
+
 1. Clone this repository and navigate to FastChat folder
 ```bash
 git clone https://github.com/lm-sys/FastChat.git
@@ -68,9 +75,31 @@ python3 -m fastchat.model.apply_delta \
     --delta lmsys/vicuna-13b-delta-v0
 ```
 
+Alternatively just do `python download-model.py anon8231489123/vicuna-13b-GPTQ-4bit-128g` to get the precomputed weights. This is copied from thisserand's Google Colab sheet.
+
 ### Vicuna-7B
 Coming soon.
 
+## Install CUDA
+
+For example on Ubuntu 18.04: 
+```bash
+wget https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
+chmod +x cuda_12.1.1_530.30.02_linux.run
+sudo sh cuda_12.1.1_530.30.02_linux.run
+```
+
+## Install GPTQ-forLLaMa 
+
+```bash
+cd FastChat
+mkdir repositories
+cd repositories
+git clone https://github.com/oobabooga/GPTQ-for-LLaMa.git -b cuda
+cd GPTQ-for-LLaMa
+python setup_cuda.py install
+cd ../..
+
 ## Serving
 
 ### Command Line Interface

diff --git a/download-model.py b/download-model.py
@@ -108,7 +108,8 @@ def select_model_from_default_options():
 
 def get_download_links_from_huggingface(model, branch):
     base = "https://huggingface.co"
-    page = f"/api/models/{model}/tree/{branch}?cursor="
+    # Fix from https://github.com/oobabooga/text-generation-webui/pull/1373
+    page = f"/api/models/{model}/tree/{branch}"
     cursor = b""
 
     links = []
@@ -120,7 +121,12 @@ def get_download_links_from_huggingface(model, branch):
     has_safetensors = False
     is_lora = False
     while True:
-        content = requests.get(f"{base}{page}{cursor.decode()}").content
+        # Fix from https://github.com/oobabooga/text-generation-webui/pull/1373
+        #content = requests.get(f"{base}{page}{cursor.decode()}").content
+        url = f"{base}{page}" + (f"?cursor={cursor.decode()}" if cursor else "")
+        r = requests.get(url)
+        r.raise_for_status()
+        content = r.content
 
         dict = json.loads(content)
         if len(dict) == 0:
@@ -247,4 +253,4 @@ def download_files(file_list, output_folder, num_threads=8):
 
         # Downloading the files
         print(f"Downloading the model to {output_folder}")
-        download_files(links, output_folder, args.threads)
+        download_files(links, output_folder, args.threads)