diff --git a/README.md b/README.md index 4ecfaf0..7455c44 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,8 @@ docker compose run --rm trainer python train.py --wake-word "hey cal" --data-dir Training takes 4-8 hours depending on GPU. +Don't be alarmed if the finished model is only a few hundred kilobytes. Test your model in the following section to see if it works correctly. + ### 5. Test Your Model Test on your host machine (needs microphone access): diff --git a/docker-compose.yml b/docker-compose.yml index 1bba6dc..a86fcc6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,7 @@ services: kokoro: image: ghcr.io/remsky/kokoro-fastapi-gpu:latest + ipc: "host" ports: - "8880:8880" deploy: @@ -13,6 +14,7 @@ services: trainer: build: . + ipc: "host" deploy: resources: reservations: diff --git a/requirements.txt b/requirements.txt index 2b0f773..80ad6d7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,6 +20,7 @@ acoustics==0.2.6 pronouncing==0.2.0 datasets==2.14.6 deep-phonemizer==0.0.19 +protobuf>=3.20.0,<4.0.0 # Data download/processing piper-phonemize diff --git a/setup-data.sh b/setup-data.sh index b5a5814..3274700 100755 --- a/setup-data.sh +++ b/setup-data.sh @@ -67,7 +67,7 @@ if [ ! -d "$DATA_DIR/audioset_16k" ]; then echo "Downloading AudioSet background audio..." mkdir -p "$DATA_DIR/audioset" "$DATA_DIR/audioset_16k" curl -L -o "$DATA_DIR/audioset/bal_train09.tar" \ - 'https://huggingface.co/datasets/agkphysics/AudioSet/resolve/main/bal_train09.tar' + 'https://huggingface.co/datasets/agkphysics/AudioSet/resolve/728fffe80088808b49db5aa700f2e264c035c880/data/bal_train09.tar' tar -xf "$DATA_DIR/audioset/bal_train09.tar" -C "$DATA_DIR/audioset" python3 << EOF