fix: resolve repeated read panic by pinning connection in goroutines

This commit is contained in:
Meta-Repo Bot 2026-01-21 04:47:47 +00:00
parent e5957cf182
commit e3c6d6c1fb
2 changed files with 39 additions and 20 deletions

27
WORKLOG.md Normal file
View file

@ -0,0 +1,27 @@
# MusicLink Worklog
## 2026-01-20
### Current Status
- **Backend:** Go (v1.22.8) bot using Matterbridge WebSocket API.
- **Deployment:** NixOS based (flake.nix), currently managed on `ops-jrz1`.
- **Issues:**
1. **Crash Loop:** `musiclink.service` is failing with `panic: repeated read on failed websocket connection`.
2. **API Instability:** `idonthavespotify` API reported to be returning 500s.
3. **Environment:** `matterbridge.service` not found on local system (likely running elsewhere or under a different name).
### Actions Taken
1. **Investigation:**
- Verified `musiclink.service` status and read `bot-crash.log`.
- Confirmed the panic location in `internal/bot/bot.go:186`.
- Verified `idonthavespotify` API status: **Currently responsive (200 OK)** for specific Spotify album links. Intermittent 500s may still occur.
- Tested Odesli (song.link) API as a potential fallback.
2. **Analysis:**
- Identified that `readLoop` in `bot.go` needs to handle connection closures more gracefully to avoid the "repeated read" panic.
- Verified that secrets (Matterbridge token) are handled via `sops-nix` by the platform team.
### Next Steps
- Implement fix for WebSocket panic in `internal/bot/bot.go`.
- Finalize investigation into `idonthavespotify` 500 errors.
- If 500s persist, implement Odesli (song.link) as an alternative service provider.
- Coordinate with platform team regarding `matterbridge` service status.

View file

@ -126,6 +126,14 @@ func (b *Bot) Run(ctx context.Context) error {
// runLoop processes messages until the connection is lost or context is canceled. // runLoop processes messages until the connection is lost or context is canceled.
func (b *Bot) runLoop(ctx context.Context) error { func (b *Bot) runLoop(ctx context.Context) error {
b.mu.Lock()
conn := b.conn
b.mu.Unlock()
if conn == nil {
return fmt.Errorf("connection is nil")
}
// Create a sub-context for this specific connection's goroutines // Create a sub-context for this specific connection's goroutines
connCtx, cancel := context.WithCancel(ctx) connCtx, cancel := context.WithCancel(ctx)
defer cancel() defer cancel()
@ -135,11 +143,11 @@ func (b *Bot) runLoop(ctx context.Context) error {
// Start reader goroutine // Start reader goroutine
go func() { go func() {
readDone <- b.readLoop(connCtx) readDone <- b.readLoop(connCtx, conn)
}() }()
// Start ping goroutine // Start ping goroutine
go b.pingLoop(connCtx) go b.pingLoop(connCtx, conn)
// Process messages // Process messages
for { for {
@ -171,7 +179,7 @@ func (b *Bot) runLoop(ctx context.Context) error {
} }
// readLoop reads messages from the WebSocket connection. // readLoop reads messages from the WebSocket connection.
func (b *Bot) readLoop(ctx context.Context) error { func (b *Bot) readLoop(ctx context.Context, conn *websocket.Conn) error {
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -179,14 +187,6 @@ func (b *Bot) readLoop(ctx context.Context) error {
default: default:
} }
b.mu.Lock()
conn := b.conn
b.mu.Unlock()
if conn == nil {
return fmt.Errorf("connection is nil")
}
_, data, err := conn.ReadMessage() _, data, err := conn.ReadMessage()
if err != nil { if err != nil {
if websocket.IsCloseError(err, websocket.CloseNormalClosure) { if websocket.IsCloseError(err, websocket.CloseNormalClosure) {
@ -212,7 +212,7 @@ func (b *Bot) readLoop(ctx context.Context) error {
} }
// pingLoop sends periodic pings to keep the connection alive. // pingLoop sends periodic pings to keep the connection alive.
func (b *Bot) pingLoop(ctx context.Context) { func (b *Bot) pingLoop(ctx context.Context, conn *websocket.Conn) {
ticker := time.NewTicker(30 * time.Second) ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop() defer ticker.Stop()
@ -221,14 +221,6 @@ func (b *Bot) pingLoop(ctx context.Context) {
case <-ctx.Done(): case <-ctx.Done():
return return
case <-ticker.C: case <-ticker.C:
b.mu.Lock()
conn := b.conn
b.mu.Unlock()
if conn == nil {
return
}
conn.SetWriteDeadline(time.Now().Add(5 * time.Second)) conn.SetWriteDeadline(time.Now().Add(5 * time.Second))
if err := conn.WriteMessage(websocket.PingMessage, nil); err != nil { if err := conn.WriteMessage(websocket.PingMessage, nil); err != nil {
log.Printf("Ping failed: %v", err) log.Printf("Ping failed: %v", err)