fix: resolve repeated read panic by pinning connection in goroutines
This commit is contained in:
parent
e5957cf182
commit
e3c6d6c1fb
27
WORKLOG.md
Normal file
27
WORKLOG.md
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# MusicLink Worklog
|
||||
|
||||
## 2026-01-20
|
||||
|
||||
### Current Status
|
||||
- **Backend:** Go (v1.22.8) bot using Matterbridge WebSocket API.
|
||||
- **Deployment:** NixOS based (flake.nix), currently managed on `ops-jrz1`.
|
||||
- **Issues:**
|
||||
1. **Crash Loop:** `musiclink.service` is failing with `panic: repeated read on failed websocket connection`.
|
||||
2. **API Instability:** `idonthavespotify` API reported to be returning 500s.
|
||||
3. **Environment:** `matterbridge.service` not found on local system (likely running elsewhere or under a different name).
|
||||
|
||||
### Actions Taken
|
||||
1. **Investigation:**
|
||||
- Verified `musiclink.service` status and read `bot-crash.log`.
|
||||
- Confirmed the panic location in `internal/bot/bot.go:186`.
|
||||
- Verified `idonthavespotify` API status: **Currently responsive (200 OK)** for specific Spotify album links. Intermittent 500s may still occur.
|
||||
- Tested Odesli (song.link) API as a potential fallback.
|
||||
2. **Analysis:**
|
||||
- Identified that `readLoop` in `bot.go` needs to handle connection closures more gracefully to avoid the "repeated read" panic.
|
||||
- Verified that secrets (Matterbridge token) are handled via `sops-nix` by the platform team.
|
||||
|
||||
### Next Steps
|
||||
- Implement fix for WebSocket panic in `internal/bot/bot.go`.
|
||||
- Finalize investigation into `idonthavespotify` 500 errors.
|
||||
- If 500s persist, implement Odesli (song.link) as an alternative service provider.
|
||||
- Coordinate with platform team regarding `matterbridge` service status.
|
||||
|
|
@ -126,6 +126,14 @@ func (b *Bot) Run(ctx context.Context) error {
|
|||
|
||||
// runLoop processes messages until the connection is lost or context is canceled.
|
||||
func (b *Bot) runLoop(ctx context.Context) error {
|
||||
b.mu.Lock()
|
||||
conn := b.conn
|
||||
b.mu.Unlock()
|
||||
|
||||
if conn == nil {
|
||||
return fmt.Errorf("connection is nil")
|
||||
}
|
||||
|
||||
// Create a sub-context for this specific connection's goroutines
|
||||
connCtx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
|
@ -135,11 +143,11 @@ func (b *Bot) runLoop(ctx context.Context) error {
|
|||
|
||||
// Start reader goroutine
|
||||
go func() {
|
||||
readDone <- b.readLoop(connCtx)
|
||||
readDone <- b.readLoop(connCtx, conn)
|
||||
}()
|
||||
|
||||
// Start ping goroutine
|
||||
go b.pingLoop(connCtx)
|
||||
go b.pingLoop(connCtx, conn)
|
||||
|
||||
// Process messages
|
||||
for {
|
||||
|
|
@ -171,7 +179,7 @@ func (b *Bot) runLoop(ctx context.Context) error {
|
|||
}
|
||||
|
||||
// readLoop reads messages from the WebSocket connection.
|
||||
func (b *Bot) readLoop(ctx context.Context) error {
|
||||
func (b *Bot) readLoop(ctx context.Context, conn *websocket.Conn) error {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
|
|
@ -179,14 +187,6 @@ func (b *Bot) readLoop(ctx context.Context) error {
|
|||
default:
|
||||
}
|
||||
|
||||
b.mu.Lock()
|
||||
conn := b.conn
|
||||
b.mu.Unlock()
|
||||
|
||||
if conn == nil {
|
||||
return fmt.Errorf("connection is nil")
|
||||
}
|
||||
|
||||
_, data, err := conn.ReadMessage()
|
||||
if err != nil {
|
||||
if websocket.IsCloseError(err, websocket.CloseNormalClosure) {
|
||||
|
|
@ -212,7 +212,7 @@ func (b *Bot) readLoop(ctx context.Context) error {
|
|||
}
|
||||
|
||||
// pingLoop sends periodic pings to keep the connection alive.
|
||||
func (b *Bot) pingLoop(ctx context.Context) {
|
||||
func (b *Bot) pingLoop(ctx context.Context, conn *websocket.Conn) {
|
||||
ticker := time.NewTicker(30 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
|
|
@ -221,14 +221,6 @@ func (b *Bot) pingLoop(ctx context.Context) {
|
|||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
b.mu.Lock()
|
||||
conn := b.conn
|
||||
b.mu.Unlock()
|
||||
|
||||
if conn == nil {
|
||||
return
|
||||
}
|
||||
|
||||
conn.SetWriteDeadline(time.Now().Add(5 * time.Second))
|
||||
if err := conn.WriteMessage(websocket.PingMessage, nil); err != nil {
|
||||
log.Printf("Ping failed: %v", err)
|
||||
|
|
|
|||
Loading…
Reference in a new issue