From 95b35a3f00579707e136d0de1b13b52e97037146 Mon Sep 17 00:00:00 2001 From: Aya <220561865+ayaadev@users.noreply.github.com> Date: Fri, 27 Mar 2026 11:14:10 +0000 Subject: [PATCH 1/4] Fix outdated AudioSet link Pin the link to a specific revision of the HuggingFace repository. --- setup-data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup-data.sh b/setup-data.sh index b5a5814..3274700 100755 --- a/setup-data.sh +++ b/setup-data.sh @@ -67,7 +67,7 @@ if [ ! -d "$DATA_DIR/audioset_16k" ]; then echo "Downloading AudioSet background audio..." mkdir -p "$DATA_DIR/audioset" "$DATA_DIR/audioset_16k" curl -L -o "$DATA_DIR/audioset/bal_train09.tar" \ - 'https://huggingface.co/datasets/agkphysics/AudioSet/resolve/main/bal_train09.tar' + 'https://huggingface.co/datasets/agkphysics/AudioSet/resolve/728fffe80088808b49db5aa700f2e264c035c880/data/bal_train09.tar' tar -xf "$DATA_DIR/audioset/bal_train09.tar" -C "$DATA_DIR/audioset" python3 << EOF From 3fb4f755951971f8bb4eecbb067f8fe3eabf6788 Mon Sep 17 00:00:00 2001 From: Aya <220561865+ayaadev@users.noreply.github.com> Date: Sat, 28 Mar 2026 08:03:08 +0000 Subject: [PATCH 2/4] Set IPC to host in Docker compose This fixes an issue where the container runs out of memory --- docker-compose.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 1bba6dc..a86fcc6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,7 @@ services: kokoro: image: ghcr.io/remsky/kokoro-fastapi-gpu:latest + ipc: "host" ports: - "8880:8880" deploy: @@ -13,6 +14,7 @@ services: trainer: build: . + ipc: "host" deploy: resources: reservations: From 9986f5e7f71d90ca468cfa731c7683048961c030 Mon Sep 17 00:00:00 2001 From: Aya <220561865+ayaadev@users.noreply.github.com> Date: Sat, 28 Mar 2026 08:04:30 +0000 Subject: [PATCH 3/4] Pin protobuf package to 3.20.x This fixes the following error. Method one was used. TypeError: Descriptors cannot be created directly. If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0. If you cannot immediately regenerate your protos, some other possible workarounds are: 1. Downgrade the protobuf package to 3.20.x or lower. 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower). --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 2b0f773..80ad6d7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,6 +20,7 @@ acoustics==0.2.6 pronouncing==0.2.0 datasets==2.14.6 deep-phonemizer==0.0.19 +protobuf>=3.20.0,<4.0.0 # Data download/processing piper-phonemize From 7eb86273e1e02d411646443cfa0d46efbb23cfd2 Mon Sep 17 00:00:00 2001 From: Aya <220561865+ayaadev@users.noreply.github.com> Date: Sat, 28 Mar 2026 08:11:13 +0000 Subject: [PATCH 4/4] Inform the user that the finished model could be small --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 4ecfaf0..7455c44 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,8 @@ docker compose run --rm trainer python train.py --wake-word "hey cal" --data-dir Training takes 4-8 hours depending on GPU. +Don't be alarmed if the finished model is only a few hundred kilobytes. Test your model in the following section to see if it works correctly. + ### 5. Test Your Model Test on your host machine (needs microphone access):