An error occurred while loading the file. Please try again.
-
Isabella Almeida da Silva authored
sandboxes - o driver pbs não havia recebido o patch de remoção - adicionada nova verificação para evitar erros na remoção de sandboxes já removidas recursivamente
28529495
local posix = {}
local signal = require("posix.signal")
local wait = require("posix.sys.wait")
local unistd = require("posix.unistd")
local stat = require("posix.sys.stat")
local procdata = require("procdata")
local safer = require("safer")
local util = require("sga.util")
local filemonitor = require("sga.filemonitor")
local function done_file(self, jid)
return self.config.runtime_data_dir.."/"..jid..".done"
end
local function start_file(self, jid)
return self.config.runtime_data_dir.."/"..jid..".start"
end
---
-- Execute a new command.
-- @param job The job object: job.data is a writable table for driver data.
-- @param cmd_string The command string
-- @return True if succeded or nil and an error message
function posix.execute_command(self, job, cmd_string)
self.active_commands = self.active_commands + 1
filemonitor.init(job)
for _, sandbox_path in ipairs(job.sandboxes) do
local ok, err = lfs.mkdir(sandbox_path)
if not ok then
local attr = lfs.attributes(sandbox_path)
if not (attr and attr.mode == "directory") then
return nil, "Failed creating job's sandbox "..sandbox_path
end
end
end
local pid, errmsg = unistd.fork()
if pid == nil then
return nil, "Failed forking subprocess"
end
if pid == 0 then
-- FIXME In the forked process, just dump errors to stdout
-- until we have a better procedure.
local function dump(err)
io.stderr:write(err.."\n")
end
for i = 3, 65535 do
unistd.close(i)
end
local ok, err = pcall(function()
signal.signal(signal.SIGHUP, signal.SIG_IGN)
pid = unistd.getpid()
local pinfo = procdata.get_process_info(pid)
assert(pinfo)
local ok, err = util.write_file(start_file(self, job.jid), pinfo.starttime)
if not ok then
dump(err)
os.exit(0) -- don't run a command we can't monitor
end
local start_time = os.time()
os.execute(cmd_string)
local walltime_s = os.difftime(os.time(), start_time)
7172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
ok, err = util.write_file(done_file(self, job.jid), walltime_s)
if not ok then
dump(err)
end
end)
if not ok then
dump(err)
end
os.exit(0)
else
job.data.pid = pid
self.logger:debug("Created PID "..pid.." for jid "..job.jid)
return true
end
end
local function is_command_alive(self, job)
local pid = job.data.pid
-- Check that it is the one we started: (detect pid rotation)
local pinfo, err = procdata.get_process_info(pid)
if pinfo then
local pidstart = util.read_file(start_file(self, job.jid))
-- if pidstart and tonumber(pidstart) == tonumber(pinfo.starttime) then
if pinfo.state == "Z" then
job.data.pinfo = pinfo
local term = wait.wait(pid, wait.WNOHANG)
if term == pid then
return false
end
end
return true
-- end
end
return false
end
local function collect_exec_data(self, job)
local script = job.parameters.csbase_command_path.."/collect_execution_data"
if not stat.stat(script) then
return pairs({})
end
return coroutine.wrap(function ()
local params = {
job.cmd_id,
job.sandboxes[1],
job.parameters.csbase_command_path,
job.parameters.csbase_command_output_path,
job.parameters.csbase_command_root_path
}
local collect_cmd = "ksh "..script.." "..table.concat(params," ")
self.logger:debug("Executing data collection script: "..collect_cmd)
local stdout = assert(io.popen(collect_cmd, 'r'))
for line in stdout:lines() do
for key, value in string.gmatch(line, "(.-)%s*=%s*([^,]+)") do
coroutine.yield(key, value)
end
end
stdout:close()
end)
-- local params = {
-- job.parameters.csbase_command_path,
-- job.cmd_id,
-- job.sandboxes[1],
-- job.parameters.csbase_command_output_path
-- }
-- local collect_cmd = "ksh "..script.." "..table.concat(params," ")
-- self.logger:debug("Executing data collection script: "..collect_cmd)