fix: resolve websocket panic and add NixOS module

- Fixed a panic in internal/bot/bot.go where a repeated read was attempted on a failed websocket connection.
- Removed the 5-second read deadline and timeout handling in readLoop.
- Added connection closure on ping failure to trigger quick reconnection.
- Added NixOS module to flake.nix for service deployment.
This commit is contained in:
Meta-Repo Bot 2026-01-20 23:46:14 +00:00
parent b6cf5fdfa7
commit e5957cf182
2 changed files with 68 additions and 29 deletions

View file

@ -39,5 +39,53 @@
];
};
}
);
) // {
nixosModules.default = { config, lib, pkgs, ... }:
with lib;
let
cfg = config.services.musiclink;
in
{
options.services.musiclink = {
enable = mkEnableOption "MusicLink Bot";
configFile = mkOption {
type = types.Path;
description = "Path to the config.toml file";
};
};
config = mkIf cfg.enable {
systemd.services.musiclink = {
description = "MusicLink Bot";
wantedBy = [ "multi-user.target" ];
after = [ "network.target" "matterbridge.service" ];
serviceConfig = {
Type = "simple";
ExecStart = "${self.packages.${pkgs.system}.default}/bin/musiclink -config ${cfg.configFile}";
Restart = "always";
RestartSec = "5s";
# Hardening
DynamicUser = true;
StateDirectory = "musiclink";
ProtectSystem = "strict";
ProtectHome = true;
NoNewPrivileges = true;
ProtectKernelTunables = true;
ProtectKernelModules = true;
ProtectControlGroups = true;
RestrictNamespaces = true;
LockPersonality = true;
MemoryDenyWriteExecute = true;
RestrictRealtime = true;
RestrictSUIDSGID = true;
PrivateMounts = true;
SystemCallFilter = [ "@system-service" "~@privileged" "~@resources" ];
};
};
};
};
};
}

View file

@ -110,28 +110,36 @@ func (b *Bot) Run(ctx context.Context) error {
// Run the message loop until disconnection
err := b.runLoop(ctx)
if err == context.Canceled {
return err
}
// Connection lost, will reconnect
log.Printf("Connection lost: %v (reconnecting...)", err)
if err != nil && err != context.Canceled {
log.Printf("Connection lost: %v (reconnecting...)", err)
}
b.closeConn()
if err == context.Canceled || (ctx.Err() != nil) {
return ctx.Err()
}
}
}
// runLoop processes messages until the connection is lost or context is canceled.
func (b *Bot) runLoop(ctx context.Context) error {
// Create a sub-context for this specific connection's goroutines
connCtx, cancel := context.WithCancel(ctx)
defer cancel()
// Channel to signal read loop exit
readDone := make(chan error, 1)
// Start reader goroutine
go func() {
readDone <- b.readLoop(ctx)
readDone <- b.readLoop(connCtx)
}()
// Start ping goroutine
go b.pingLoop(ctx)
go b.pingLoop(connCtx)
// Process messages
for {
@ -165,7 +173,6 @@ func (b *Bot) runLoop(ctx context.Context) error {
// readLoop reads messages from the WebSocket connection.
func (b *Bot) readLoop(ctx context.Context) error {
for {
// Check if context is done before blocking on read
select {
case <-ctx.Done():
return ctx.Err()
@ -177,21 +184,14 @@ func (b *Bot) readLoop(ctx context.Context) error {
b.mu.Unlock()
if conn == nil {
return fmt.Errorf("connection closed")
return fmt.Errorf("connection is nil")
}
// Set read deadline so we can check context periodically
conn.SetReadDeadline(time.Now().Add(5 * time.Second))
_, data, err := conn.ReadMessage()
if err != nil {
// Timeout is expected, check context and continue
if websocket.IsCloseError(err, websocket.CloseNormalClosure) {
return nil
}
if isTimeout(err) {
continue
}
return fmt.Errorf("read error: %w", err)
}
@ -203,6 +203,8 @@ func (b *Bot) readLoop(ctx context.Context) error {
select {
case b.messages <- msg:
case <-ctx.Done():
return ctx.Err()
default:
log.Printf("Message queue full, dropping message")
}
@ -230,6 +232,7 @@ func (b *Bot) pingLoop(ctx context.Context) {
conn.SetWriteDeadline(time.Now().Add(5 * time.Second))
if err := conn.WriteMessage(websocket.PingMessage, nil); err != nil {
log.Printf("Ping failed: %v", err)
b.closeConn()
return
}
}
@ -284,15 +287,3 @@ func (b *Bot) Close() error {
}
return nil
}
// isTimeout checks if an error is a timeout error.
func isTimeout(err error) bool {
if err == nil {
return false
}
// Check for net.Error timeout
if netErr, ok := err.(interface{ Timeout() bool }); ok {
return netErr.Timeout()
}
return false
}