CoreWorxLab · ayaadev · Mar 27, 2026 · Mar 28, 2026 · Mar 28, 2026 · Mar 28, 2026
diff --git a/README.md b/README.md
@@ -56,6 +56,8 @@ docker compose run --rm trainer python train.py --wake-word "hey cal" --data-dir
 
 Training takes 4-8 hours depending on GPU.
 
+Don't be alarmed if the finished model is only a few hundred kilobytes. Test your model in the following section to see if it works correctly.
+
 ### 5. Test Your Model
 
 Test on your host machine (needs microphone access):

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,6 +1,7 @@
 services:
   kokoro:
     image: ghcr.io/remsky/kokoro-fastapi-gpu:latest
+    ipc: "host"
     ports:
       - "8880:8880"
     deploy:
@@ -13,6 +14,7 @@ services:
 
   trainer:
     build: .
+    ipc: "host"
     deploy:
       resources:
         reservations:

diff --git a/requirements.txt b/requirements.txt
@@ -20,6 +20,7 @@ acoustics==0.2.6
 pronouncing==0.2.0
 datasets==2.14.6
 deep-phonemizer==0.0.19
+protobuf>=3.20.0,<4.0.0
 
 # Data download/processing
 piper-phonemize

diff --git a/setup-data.sh b/setup-data.sh
@@ -67,7 +67,7 @@ if [ ! -d "$DATA_DIR/audioset_16k" ]; then
     echo "Downloading AudioSet background audio..."
     mkdir -p "$DATA_DIR/audioset" "$DATA_DIR/audioset_16k"
     curl -L -o "$DATA_DIR/audioset/bal_train09.tar" \
-        'https://huggingface.co/datasets/agkphysics/AudioSet/resolve/main/bal_train09.tar'
+        'https://huggingface.co/datasets/agkphysics/AudioSet/resolve/728fffe80088808b49db5aa700f2e264c035c880/data/bal_train09.tar'
     tar -xf "$DATA_DIR/audioset/bal_train09.tar" -C "$DATA_DIR/audioset"
 
     python3 << EOF