diff --git a/WORKLOG.md b/WORKLOG.md new file mode 100644 index 0000000..c10cc7b --- /dev/null +++ b/WORKLOG.md @@ -0,0 +1,27 @@ +# MusicLink Worklog + +## 2026-01-20 + +### Current Status +- **Backend:** Go (v1.22.8) bot using Matterbridge WebSocket API. +- **Deployment:** NixOS based (flake.nix), currently managed on `ops-jrz1`. +- **Issues:** + 1. **Crash Loop:** `musiclink.service` is failing with `panic: repeated read on failed websocket connection`. + 2. **API Instability:** `idonthavespotify` API reported to be returning 500s. + 3. **Environment:** `matterbridge.service` not found on local system (likely running elsewhere or under a different name). + +### Actions Taken +1. **Investigation:** + - Verified `musiclink.service` status and read `bot-crash.log`. + - Confirmed the panic location in `internal/bot/bot.go:186`. + - Verified `idonthavespotify` API status: **Currently responsive (200 OK)** for specific Spotify album links. Intermittent 500s may still occur. + - Tested Odesli (song.link) API as a potential fallback. +2. **Analysis:** + - Identified that `readLoop` in `bot.go` needs to handle connection closures more gracefully to avoid the "repeated read" panic. + - Verified that secrets (Matterbridge token) are handled via `sops-nix` by the platform team. + +### Next Steps +- Implement fix for WebSocket panic in `internal/bot/bot.go`. +- Finalize investigation into `idonthavespotify` 500 errors. +- If 500s persist, implement Odesli (song.link) as an alternative service provider. +- Coordinate with platform team regarding `matterbridge` service status. diff --git a/internal/bot/bot.go b/internal/bot/bot.go index fa9f650..c13b77b 100644 --- a/internal/bot/bot.go +++ b/internal/bot/bot.go @@ -126,6 +126,14 @@ func (b *Bot) Run(ctx context.Context) error { // runLoop processes messages until the connection is lost or context is canceled. func (b *Bot) runLoop(ctx context.Context) error { + b.mu.Lock() + conn := b.conn + b.mu.Unlock() + + if conn == nil { + return fmt.Errorf("connection is nil") + } + // Create a sub-context for this specific connection's goroutines connCtx, cancel := context.WithCancel(ctx) defer cancel() @@ -135,11 +143,11 @@ func (b *Bot) runLoop(ctx context.Context) error { // Start reader goroutine go func() { - readDone <- b.readLoop(connCtx) + readDone <- b.readLoop(connCtx, conn) }() // Start ping goroutine - go b.pingLoop(connCtx) + go b.pingLoop(connCtx, conn) // Process messages for { @@ -171,7 +179,7 @@ func (b *Bot) runLoop(ctx context.Context) error { } // readLoop reads messages from the WebSocket connection. -func (b *Bot) readLoop(ctx context.Context) error { +func (b *Bot) readLoop(ctx context.Context, conn *websocket.Conn) error { for { select { case <-ctx.Done(): @@ -179,14 +187,6 @@ func (b *Bot) readLoop(ctx context.Context) error { default: } - b.mu.Lock() - conn := b.conn - b.mu.Unlock() - - if conn == nil { - return fmt.Errorf("connection is nil") - } - _, data, err := conn.ReadMessage() if err != nil { if websocket.IsCloseError(err, websocket.CloseNormalClosure) { @@ -212,7 +212,7 @@ func (b *Bot) readLoop(ctx context.Context) error { } // pingLoop sends periodic pings to keep the connection alive. -func (b *Bot) pingLoop(ctx context.Context) { +func (b *Bot) pingLoop(ctx context.Context, conn *websocket.Conn) { ticker := time.NewTicker(30 * time.Second) defer ticker.Stop() @@ -221,14 +221,6 @@ func (b *Bot) pingLoop(ctx context.Context) { case <-ctx.Done(): return case <-ticker.C: - b.mu.Lock() - conn := b.conn - b.mu.Unlock() - - if conn == nil { - return - } - conn.SetWriteDeadline(time.Now().Add(5 * time.Second)) if err := conn.WriteMessage(websocket.PingMessage, nil); err != nil { log.Printf("Ping failed: %v", err)