diff --git a/SKILL.md b/SKILL.md index 859fd2f..1a27f24 100644 --- a/SKILL.md +++ b/SKILL.md @@ -12,11 +12,12 @@ ## Pending (not started) -- [ ] Test end-to-end: grant accessibility, press ⌥/, confirm response plays and logs -- [ ] Verify ⌥/ hotkey fires correctly (pynput vk=44 + alt modifier) -- [ ] Set up launch agent so FLINCH starts on login (removes Terminal dependency) -- [ ] Add `flinch` alias to `~/.zshrc` as interim run command -- [ ] Re-grant accessibility permission to launch agent process (after launch agent set up) +- [x] Test end-to-end: confirmed working (2026-05-31) +- [x] Hotkey working — switched from pynput to NSEvent global monitor, trigger is right ⌘ hold-to-record +- [x] LLM switched to OpenAI gpt-4o-mini (key from macOS Keychain: service=OPENAI_API_KEY, account=mars) +- [x] Launch agent created at `~/Library/LaunchAgents/com.aditya.flinch.plist` (2026-05-31) +- [ ] ~~Add `flinch` alias~~ — not needed, launch agent handles startup +- [ ] Fix Keychain permission prompts — macOS prompts on launch agent startup; solution is "Always Allow" in Keychain Access or grant ACL to venv Python binary ## Run command (current) @@ -33,4 +34,7 @@ python /Users/aditya/Documents/Projects/flinch/flinch.py - keep_alive: default 5 min — user talks back-to-back, unloading between turns kills flow - Log is for user only — duck stays stateless, no cross-session memory fed to LLM - ElevenLabs key in macOS Keychain, not `.env` +- OpenAI key in macOS Keychain: service=`OPENAI_API_KEY`, account=`mars` — default LLM is gpt-4o-mini +- Hotkey changed from ⌥/ (pynput, broken on macOS) to right ⌘ via NSEvent global monitor +- Menu icon changed from 🦆 to ⚡; dock icon hidden via `NSApplicationActivationPolicyAccessory` - Venv in `/Users/aditya/venvs/flinch` (consistent with all other venvs) diff --git a/flinch.py b/flinch.py index fa55c31..0f032eb 100644 --- a/flinch.py +++ b/flinch.py @@ -15,18 +15,19 @@ from faster_whisper import WhisperModel import ollama from elevenlabs.client import ElevenLabs -from pynput import keyboard +from AppKit import NSEvent, NSApp, NSApplicationActivationPolicyAccessory from dotenv import load_dotenv load_dotenv() VOICE_ID = os.getenv("ELEVEN_VOICE_ID", "gj74dvtipVOXMFculyU6") ELEVEN_KEY = keyring.get_password("flinch", "eleven_api_key") or os.getenv("ELEVEN_API_KEY") +OPENAI_KEY = keyring.get_password("OPENAI_API_KEY", "mars") or os.getenv("OPENAI_API_KEY") LOG_DIR = Path(os.path.expanduser( os.getenv("FLINCH_LOG_DIR", "~/Documents/obsidian/tyler/flinch") )) SAMPLE_RATE = 16000 -LLM_PROVIDER = os.getenv("LLM_PROVIDER", "ollama") +LLM_PROVIDER = os.getenv("LLM_PROVIDER", "openai") OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "gemma3:4b") OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") @@ -47,6 +48,15 @@ _eleven_client = ElevenLabs(api_key=ELEVEN_KEY) +def _ensure_ollama(): + try: + ollama.list() + except Exception: + subprocess.Popen(["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + import time + time.sleep(2) + + def _get_whisper() -> WhisperModel: global _whisper_model if _whisper_model is None: @@ -57,7 +67,7 @@ def _get_whisper() -> WhisperModel: def _ask_llm(transcript: str) -> str: if LLM_PROVIDER == "openai": from openai import OpenAI - client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) + client = OpenAI(api_key=OPENAI_KEY) resp = client.chat.completions.create( model=OPENAI_MODEL, messages=[ @@ -102,32 +112,34 @@ def _log(transcript: str, question: str): class FlinchApp(rumps.App): def __init__(self): - super().__init__("🦆", quit_button="Quit FLINCH") + super().__init__("⚡", quit_button="Quit FLINCH") + NSApp.setActivationPolicy_(NSApplicationActivationPolicyAccessory) self.recording = False self.frames = [] self.stream = None - self._alt_held = False + _ensure_ollama() self._start_hotkey_listener() def _start_hotkey_listener(self): - def on_press(key): - if key in (keyboard.Key.alt, keyboard.Key.alt_l, keyboard.Key.alt_r): - self._alt_held = True - elif self._alt_held and not self.recording: - vk = getattr(key, "vk", None) - char = getattr(key, "char", None) - if vk == 44 or char in ("/", "÷"): + # Right Command = keyCode 54, Right Shift = keyCode 60 + # NSFlagsChangedMask = 1 << 12; NSCommandKeyMask = 1 << 20 + TRIGGER_KEYCODE = 54 + NSFlagsChangedMask = 1 << 12 + NSCommandKeyMask = 1 << 20 + + def handler(event): + if event.keyCode() != TRIGGER_KEYCODE: + return + if event.modifierFlags() & NSCommandKeyMask: + if not self.recording: self._start_recording() - - def on_release(key): - if key in (keyboard.Key.alt, keyboard.Key.alt_l, keyboard.Key.alt_r): - self._alt_held = False + else: if self.recording: self._stop_and_process() - listener = keyboard.Listener(on_press=on_press, on_release=on_release) - listener.daemon = True - listener.start() + self._monitor = NSEvent.addGlobalMonitorForEventsMatchingMask_handler_( + NSFlagsChangedMask, handler + ) def _audio_callback(self, indata, frames_count, time_info, status): self.frames.append(indata.copy()) @@ -181,7 +193,7 @@ def _process(self): except Exception as e: print(f"flinch error: {e}") finally: - self.title = "🦆" + self.title = "⚡" if __name__ == "__main__":