Благодаря некоторым указателям из @Jeroen Mostert я смог найти следующее решение. Это хранимая процедура, которую я периодически запускаю из задания агента, которое проверяет и исправляет сеансы, застрявшие в KILLED / ROLLBACK, которые были вызваны завершением сеанса, который был в середине выполнения задачи xp_cmdshell:
CREATE PROCEDURE [dbo].[sp_KillStuckSessions]
AS
BEGIN
SET NOCOUNT ON;
DECLARE @ErrorText varchar(4000)
--Find out if we have any Sessions stuck in a KILLED/ROLLBACK state:
IF NOT EXISTS ( SELECT a.spid
FROM sys.sysprocesses a WITH(NOLOCK)
INNER JOIN sys.dm_exec_sessions b WITH(NOLOCK) ON a.spid = b.session_id
WHERE a.cmd = 'KILLED/ROLLBACK'
AND DATEDIFF(minute,a.last_batch,GETDATE()) > 5)
--If we don't, skip to the end and do nothing.
GOTO Endpoint
--Find out what the stuck session actually is
DECLARE @StuckTaskName varchar(1000)
DECLARE @NumStuckJobs integer
SELECT @NumStuckJobs = COUNT(*) FROM (
SELECT (
SELECT text FROM sys.dm_exec_sql_text(a.sql_handle)) AS SqlCommand
FROM sys.sysprocesses a WITH(NOLOCK)
INNER JOIN sys.dm_exec_sessions b WITH(NOLOCK) ON a.spid = b.session_id
WHERE a.cmd = 'KILLED/ROLLBACK'
AND DATEDIFF(minute,a.last_batch,GETDATE()) > 5
) a
--If its a single xp_cmdshell process, we want to kill it
IF @NumStuckJobs <> 1
BEGIN
SET @ErrorText = 'More than 1 session is stuck in KILLED/ROLLBACK - KillStuckSessions cannot kill more than 1. Please remote into the server and fix.'
GOTO ErrorHandling
END
ELSE
SET @StuckTaskName = (SELECT (SELECT text FROM sys.dm_exec_sql_text(a.sql_handle)) AS SqlCommand
FROM sys.sysprocesses a WITH(NOLOCK)
INNER JOIN sys.dm_exec_sessions b WITH(NOLOCK) ON a.spid = b.session_id
WHERE a.cmd = 'KILLED/ROLLBACK')
--If its anything else, we want to raise an error message with custom error text.
IF @StuckTaskName <> 'xp_cmdshell'
BEGIN
SET @ErrorText = 'A stuck session was caused by something other than xp_cmdshell - KillStuckSessions cannot kill this process. Please remote into the server and fix.'
GOTO ErrorHandling
END
--If we get this far, we know that there's one stuck session and it is an xp_cmdshell task.
--Let's kill the task!
--Declare a bunch of variables including a table variable to store the output of our wmic commands
DECLARE @wmicdata table (ProcessID varchar(1000) NULL)
DECLARE @SQLServerPID integer
DECLARE @PIDTestA1 integer
DECLARE @PIDTestA2 integer
DECLARE @PIDTestB1 integer
DECLARE @PIDTestB2 integer
DECLARE @OrphanedPID integer
DECLARE @StuckProcess1 integer
DECLARE @StuckProcess2 integer
--First we run this to get the PID of sqlserver.exe, which is what will have spawned xp_cmdshell
INSERT @wmicdata
EXEC xp_cmdshell 'wmic process where (name="sqlservr.exe") get ProcessID'
SET @SQLServerPID = (
SELECT CAST(SUBSTRING(ProcessID, PATINDEX('%[0-9]%', ProcessID),PATINDEX('%[^0-9]%', SUBSTRING(ProcessID, PATINDEX('%[0-9]%', ProcessID), 8000) + 'X') -1) AS integer)
FROM @wmicdata
WHERE STUFF(ProcessID, 1, PATINDEX('%[0-9]%', ProcessID)-1, '') IS NOT NULL
AND ProcessID IS NOT NULL)
--Next, we run this twice to get the process ID of any stuck child processes of sql server.
DECLARE @cmdshellInput varchar(1000)
SET @cmdshellInput = 'wmic process where (ParentProcessId=' + CAST(@SQLServerPID AS varchar(10)) + ') get ProcessID'
--Run 1
DELETE @wmicdata
INSERT @wmicdata
EXEC xp_cmdshell @cmdshellInput
SET @PIDTestA1 = (
SELECT TOP 1 CAST(SUBSTRING(ProcessID, PATINDEX('%[0-9]%', ProcessID),PATINDEX('%[^0-9]%', SUBSTRING(ProcessID, PATINDEX('%[0-9]%', ProcessID), 8000) + 'X') -1) AS integer)
FROM @wmicdata
WHERE STUFF(ProcessID, 1, PATINDEX('%[0-9]%', ProcessID)-1, '') IS NOT NULL
AND ProcessID IS NOT NULL
ORDER BY ProcessID ASC)
SET @PIDTestA2 = (
SELECT TOP 1 CAST(SUBSTRING(ProcessID, PATINDEX('%[0-9]%', ProcessID),PATINDEX('%[^0-9]%', SUBSTRING(ProcessID, PATINDEX('%[0-9]%', ProcessID), 8000) + 'X') -1) AS integer)
FROM @wmicdata
WHERE STUFF(ProcessID, 1, PATINDEX('%[0-9]%', ProcessID)-1, '') IS NOT NULL
AND ProcessID IS NOT NULL
ORDER BY ProcessID DESC)
--Wait a second
WAITFOR DELAY '00:00:01'
--Run 2
DELETE @wmicdata
INSERT @wmicdata
EXEC xp_cmdshell @cmdshellInput
SET @PIDTestB1 = (
SELECT TOP 1 CAST(SUBSTRING(ProcessID, PATINDEX('%[0-9]%', ProcessID),PATINDEX('%[^0-9]%', SUBSTRING(ProcessID, PATINDEX('%[0-9]%', ProcessID), 8000) + 'X') -1) AS integer)
FROM @wmicdata
WHERE STUFF(ProcessID, 1, PATINDEX('%[0-9]%', ProcessID)-1, '') IS NOT NULL
AND ProcessID IS NOT NULL
ORDER BY ProcessID ASC)
SET @PIDTestB2 = (
SELECT TOP 1 CAST(SUBSTRING(ProcessID, PATINDEX('%[0-9]%', ProcessID),PATINDEX('%[^0-9]%', SUBSTRING(ProcessID, PATINDEX('%[0-9]%', ProcessID), 8000) + 'X') -1) AS integer)
FROM @wmicdata
WHERE STUFF(ProcessID, 1, PATINDEX('%[0-9]%', ProcessID)-1, '') IS NOT NULL
AND ProcessID IS NOT NULL
ORDER BY ProcessID DESC)
--The number that doesn't immediately change is our orphaned cmd.exe process.
--The number that does change is the wmic command that we're running to get the PID!
IF @PIDTestA1 = @PIDTestB1
SET @OrphanedPID = @PIDTestA1
IF @PIDTestA1 = @PIDTestB2
SET @OrphanedPID = @PIDTestA1
IF @PIDTestA2 = @PIDTestB1
SET @OrphanedPID = @PIDTestA2
IF @PIDTestA2 = @PIDTestB2
SET @OrphanedPID = @PIDTestA2
--If none of them matched, then we had a problem.
IF @OrphanedPID IS NULL
BEGIN
SET @ErrorText = 'A stuck session was caused by something other than xp_cmdshell - KillStuckSessions cannot kill this process. Please remote into the server and fix.'
GOTO ErrorHandling
END
--Next, run it again on the orhpaned cmd.exe process to find out the child processes that we want to kill
--There should be 2 tasks total which we want to kill, a conhost.exe and the actual executable which we invoked from xp_cmdshell (psftp.exe)
SET @cmdshellInput = 'wmic process where (ParentProcessId=' + CAST(@OrphanedPID AS varchar(10)) + ') get ProcessID'
DELETE @wmicdata
INSERT @wmicdata
EXEC xp_cmdshell @cmdshellInput
--Identify Stuck Process 1
SET @StuckProcess1 = (
SELECT TOP 1 CAST(SUBSTRING(ProcessID, PATINDEX('%[0-9]%', ProcessID),PATINDEX('%[^0-9]%', SUBSTRING(ProcessID, PATINDEX('%[0-9]%', ProcessID), 8000) + 'X') -1) AS integer)
FROM @wmicdata
WHERE STUFF(ProcessID, 1, PATINDEX('%[0-9]%', ProcessID)-1, '') IS NOT NULL
AND ProcessID IS NOT NULL
ORDER BY ProcessID ASC)
--Identify Stuck Process 2
SET @StuckProcess2 = (
SELECT TOP 1 CAST(SUBSTRING(ProcessID, PATINDEX('%[0-9]%', ProcessID),PATINDEX('%[^0-9]%', SUBSTRING(ProcessID, PATINDEX('%[0-9]%', ProcessID), 8000) + 'X') -1) AS integer)
FROM @wmicdata
WHERE STUFF(ProcessID, 1, PATINDEX('%[0-9]%', ProcessID)-1, '') IS NOT NULL
AND ProcessID IS NOT NULL
ORDER BY ProcessID DESC)
--Get the .exe names of the processes we're about to kill
DECLARE @exe1 AS varchar(1000)
DECLARE @exe2 AS varchar(1000)
SET @cmdshellInput = 'wmic process where (ParentProcessId=' + CAST(@OrphanedPID AS varchar(10)) + ') get Name'
DELETE @wmicdata
INSERT @wmicdata
EXEC xp_cmdshell @cmdshellInput
SET @exe1 = (SELECT TOP 1 ProcessID FROM @wmicdata WHERE ProcessID LIKE '%.%' ORDER BY ProcessID ASC)
SET @exe2 = (SELECT TOP 1 ProcessID FROM @wmicdata WHERE ProcessID LIKE '%.%' ORDER BY ProcessID DESC)
--Finally, run these to kill the orphaned child tasks
SET @cmdshellInput = 'taskkill /f /pid ' + CAST(@StuckProcess1 AS varchar(10))
EXEC xp_cmdshell @cmdshellInput
SET @cmdshellInput = 'taskkill /f /pid ' + CAST(@StuckProcess2 AS varchar(10))
EXEC xp_cmdshell @cmdshellInput
--Email to say a process was killed
DECLARE @EmailBody varchar(4000)
SET @EmailBody = 'A task causing an SQL session to be stuck in KILLED/ROLLBACK was automatically killed on the server.<P>The name(s) of the executables that were killed:<P>' + @exe1 + '<P>' + @exe2
EXEC Automation.dbo.sp_SendEmailExternally --This is another stored proc of mine that sends emails. You could swap this for sp_send_dbmail
@Recipients = 'your@email.here',
@Subject = 'Agent Job KillStuckSessions found and killed a stuck session!',
@HTMLBody = @EmailBody
--Finished! Now the killed/rollback session will be gone.
--Skip the ErrorHandling section
GOTO Endpoint
ErrorHandling:
--Since We're running from an Agent Job, using RAISERROR here will generate an Alert email, and the Agent Job history will contain the custom @ErrorText message.
RAISERROR (@ErrorText, 16, 1)
EndPoint:
END
GO