HEX
Server: Apache/2
System: Linux nexus-01 4.18.0-553.120.1.el8_10.x86_64 #1 SMP Mon Apr 20 18:04:27 EDT 2026 x86_64
User: aglcoke (1118)
PHP: 8.2.31
Disabled: mail,exec,system,passthru,shell_exec,proc_close,proc_open,dl,popen,show_source,posix_kill,posix_mkfifo,posix_getpwuid,posix_setpgid,posix_setsid,posix_setuid,posix_setgid,posix_seteuid,posix_setegid,posix_uname
Upload Files
File: //proc/1/task/1/root/usr/share/rspamd/lualib/lua_scanners/cloudmark.lua
--[[
Copyright (c) 2021, Alexander Moisseev <moiseev@mezonplus.ru>

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
]]--

--[[[
-- @module cloudmark
-- This module contains Cloudmark v2 interface
--]]

local lua_util = require "lua_util"
local http = require "rspamd_http"
local upstream_list = require "rspamd_upstream_list"
local rspamd_logger = require "rspamd_logger"
local ucl = require "ucl"
local rspamd_util = require "rspamd_util"
local common = require "lua_scanners/common"
local fun = require "fun"
local lua_mime = require "lua_mime"

local N = 'cloudmark'
-- Boundary for multipart transfers, generated on module init
local static_boundary = rspamd_util.random_hex(32)

local function cloudmark_url(rule, addr, maybe_url)
  local url
  local port = addr:get_port()

  maybe_url = maybe_url or rule.url
  if port == 0 then
    port = rule.default_port
  end
  if rule.use_https then
    url = string.format('https://%s:%d%s', tostring(addr),
        port, maybe_url)
  else
    url = string.format('http://%s:%d%s', tostring(addr),
        port, maybe_url)
  end

  return url
end

-- Detect cloudmark max size
local function cloudmark_preload(rule, cfg, ev_base, _)
  local upstream = rule.upstreams:get_upstream_round_robin()
  if not upstream then
    rspamd_logger.errx(ev_base or rspamd_config,
        'cloudmark preload: no upstream available, will retry on next scan')
    return
  end
  local addr = upstream:get_addr()
  local function max_message_size_cb(http_err, code, body, _)
    if http_err then
      rspamd_logger.errx(ev_base, 'HTTP error when getting max message size: %s',
          http_err)
      return
    end
    if code ~= 200 then
      rspamd_logger.errx(ev_base, 'bad HTTP code when getting max message size: %s', code)
    end
    local parser = ucl.parser()
    local ret, err = parser:parse_string(body)
    if not ret then
      rspamd_logger.errx(ev_base, 'could not parse response body [%s]: %s', body, err)
      return
    end
    local obj = parser:get_object()
    local ms = obj.maxMessageSize
    if not ms then
      rspamd_logger.errx(ev_base, 'missing maxMessageSize in the response body (JSON): %s', obj)
      return
    end

    rule.max_size = ms
    lua_util.debugm(N, cfg, 'set maximum message size set to %s bytes', ms)
  end
  http.request({
    ev_base = ev_base,
    config = cfg,
    url = cloudmark_url(rule, addr, '/score/v2/max-message-size'),
    callback = max_message_size_cb,
  })
end

local function numerify(d)
  local l = {}
  for k in pairs(d) do
    table.insert(l, k)
  end
  for _, k in ipairs(l) do
    local new_key = tonumber(k)
    if new_key then
      d[new_key] = d[k]
      d[k] = nil
    end
  end
end

local function cloudmark_config(opts)

  local cloudmark_conf = {
    name = N,
    default_port = 2713,
    url = '/score/v2/message',
    use_https = false,
    timeout = 5.0,
    log_clean = false,
    retransmits = 1,
    score_threshold = 90, -- minimum score to considerate reply
    message = '${SCANNER}: spam message found: "${VIRUS}"',
    max_message = 0,
    detection_category = "hash",
    default_score = 1,
    action = false,
    log_spamcause = true,
    symbol_fail = 'CLOUDMARK_FAIL',
    symbol = 'CLOUDMARK_CHECK',
    symbol_spam = 'CLOUDMARK_SPAM',
    add_score_header = false, -- Add X-CMAE-Score header
    add_headers = false, -- allow addition of the headers from Cloudmark
    scores_symbols = nil, -- a table with match { [score_threshold] = symbol, ... }
  }

  cloudmark_conf = lua_util.override_defaults(cloudmark_conf, opts)

  if type(cloudmark_conf.scores_symbols) == 'table' then
    numerify(cloudmark_conf.scores_symbols)
  end

  if not cloudmark_conf.prefix then
    cloudmark_conf.prefix = 'rs_' .. cloudmark_conf.name .. '_'
  end

  if not cloudmark_conf.log_prefix then
    if cloudmark_conf.name:lower() == cloudmark_conf.type:lower() then
      cloudmark_conf.log_prefix = cloudmark_conf.name
    else
      cloudmark_conf.log_prefix = cloudmark_conf.name .. ' (' .. cloudmark_conf.type .. ')'
    end
  end

  if not cloudmark_conf.servers and cloudmark_conf.socket then
    cloudmark_conf.servers = cloudmark_conf.socket
  end

  if not cloudmark_conf.servers then
    rspamd_logger.errx(rspamd_config, 'no servers defined')

    return nil
  end

  cloudmark_conf.upstreams = upstream_list.create(rspamd_config,
      cloudmark_conf.servers,
      cloudmark_conf.default_port)

  if cloudmark_conf.upstreams then

    cloudmark_conf.symbols = { { symbol = cloudmark_conf.symbol_spam, score = 5.0 } }
    cloudmark_conf.preloads = { cloudmark_preload }
    lua_util.add_debug_alias('external_services', cloudmark_conf.name)
    return cloudmark_conf
  end

  rspamd_logger.errx(rspamd_config, 'cannot parse servers %s',
      cloudmark_conf['servers'])
  return nil
end

local function get_specific_symbol(scores_symbols, score)
  local selected
  local sel_thr = -1

  for threshold, sym in pairs(scores_symbols) do
    if sel_thr < threshold and threshold <= score then
      selected = sym
      sel_thr = threshold
    end
  end

  return selected
end

assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM' }, 100) == 'CLOUDMARK_SPAM')
assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM' }, 80) == nil)
assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 100) == 'CLOUDMARK_SPAM')
assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 80) == 'CLOUDMARK_SPAM2')
assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 70) == nil)
assert(get_specific_symbol({ [90] = 'CLOUDMARK_SPAM', [80] = 'CLOUDMARK_SPAM2' }, 90) == 'CLOUDMARK_SPAM')
assert(get_specific_symbol({ }, 80) == nil)
assert(get_specific_symbol({ [100] = 'CLOUDMARK_SPAM' }, 100) == 'CLOUDMARK_SPAM')
assert(get_specific_symbol({ [0] = 'CLOUDMARK_SPAM' }, 0) == 'CLOUDMARK_SPAM')

local function parse_cloudmark_reply(task, rule, body)
  local parser = ucl.parser()
  local ret, err = parser:parse_string(body)
  if not ret then
    rspamd_logger.errx(task, '%s: bad response body (raw): %s', N, body)
    task:insert_result(rule.symbol_fail, 1.0, 'Parser error: ' .. err)
    return
  end
  local obj = parser:get_object()
  lua_util.debugm(N, task, 'cloudmark reply is: %s', obj)

  if not obj.score then
    rspamd_logger.errx(task, '%s: bad response body (raw): %s', N, body)
    task:insert_result(rule.symbol_fail, 1.0, 'Parser error: no score')
    return
  end

  if obj.analysis then
    -- Report analysis string
    local qid = task:get_queue_id() or 'unknown'
    rspamd_logger.infox(task, 'qid: <%s>, cloudmark report string: %s', qid, obj.analysis)
  end

  local score = tonumber(obj.score) or 0
  if score >= rule.score_threshold then
    task:insert_result(rule.symbol_spam, 1.0, tostring(score))
  end

  if rule.add_headers and type(obj.appendHeaders) == 'table' then
    local headers_add = fun.tomap(fun.map(function(h)
      return h.headerField, {
        order = 1, value = h.body
      }
    end, obj.appendHeaders))
    lua_mime.modify_headers(task, {
      add = headers_add
    })
  end

  if rule.add_score_header then
    lua_mime.modify_headers(task, {
      add = {
        ['X-CMAE-Score'] = {
          order = 1,
          value = tostring(score)
        }
      }
    })
  end

  if type(rule.scores_symbols) == 'table' then
    local sym = get_specific_symbol(rule.scores_symbols, score)
    if sym then
      task:insert_result(sym, 1.0, tostring(score))
    end
  end

end

local function cloudmark_check(task, content, digest, rule, maybe_part)
  local function cloudmark_check_uncached()
    local upstream = common.get_upstream_or_fail(task, rule, maybe_part)
    if not upstream then
      return
    end
    local addr = upstream:get_addr()
    local retransmits = rule.retransmits

    local url = cloudmark_url(rule, addr)
    local message_data = task:get_content()
    if rule.max_message and rule.max_message > 0 and #message_data > rule.max_message then
      task:insert_result(rule['symbol_fail'], 0.0, 'Message too large: ' .. #message_data)
      return
    end
    local request = {
      rfc822 = {
        ['Content-Type'] = 'message/rfc822',
        data = message_data,
      }
    }

    local helo = task:get_helo()
    if helo then
      request['heloDomain'] = {
        data = helo,
      }
    end
    local mail_from = task:get_from('smtp') or {}
    if mail_from[1] and #mail_from[1].addr > 1 then
      request['mailFrom'] = {
        data = mail_from[1].addr
      }
    end

    local rcpt_to = task:get_recipients('smtp')
    if rcpt_to then
      request['rcptTo'] = {
        data = table.concat(fun.totable(fun.map(function(r)
          return r.addr
        end, rcpt_to)), ',')
      }
    end

    local fip = task:get_from_ip()
    if fip and fip:is_valid() then
      request['connIp'] = {
        data = tostring(fip)
      }
    end

    local hostname = task:get_hostname()
    if hostname then
      request['fromHost'] = hostname
    end

    local request_data = {
      task = task,
      url = url,
      body = lua_util.table_to_multipart_body(request, static_boundary),
      headers = {
        ['Content-Type'] = string.format('multipart/form-data; boundary="%s"', static_boundary)
      },
      timeout = rule.timeout,
    }

    local function cloudmark_callback(http_err, code, body, headers)

      local function cloudmark_requery()
        -- set current upstream to fail because an error occurred
        upstream:fail()

        -- retry with another upstream until retransmits exceeds
        if retransmits > 0 then

          retransmits = retransmits - 1

          lua_util.debugm(rule.name, task,
              '%s: request Error: %s - retries left: %s',
              rule.log_prefix, http_err, retransmits)

          -- Select a different upstream!
          upstream = rule.upstreams:get_upstream_round_robin()
          if not upstream then
            common.yield_result(task, rule,
                'no upstream available for retry', 0.0, 'fail', maybe_part)
            return
          end
          addr = upstream:get_addr()
          url = cloudmark_url(rule, addr)

          lua_util.debugm(rule.name, task, '%s: retry IP: %s:%s',
              rule.log_prefix, addr, addr:get_port())
          request_data.url = url

          http.request(request_data)
        else
          rspamd_logger.errx(task, '%s: failed to scan, maximum retransmits ' ..
              'exceed', rule.log_prefix)
          task:insert_result(rule['symbol_fail'], 0.0, 'failed to scan and ' ..
              'retransmits exceed')
          upstream:fail()
        end
      end

      if http_err then
        cloudmark_requery()
      else
        -- Parse the response
        if upstream then
          upstream:ok()
        end
        if code ~= 200 then
          rspamd_logger.errx(task, 'invalid HTTP code: %s, body: %s, headers: %s', code, body, headers)
          task:insert_result(rule.symbol_fail, 1.0, 'Bad HTTP code: ' .. code)
          return
        end
        parse_cloudmark_reply(task, rule, body)
      end
    end

    request_data.callback = cloudmark_callback
    http.request(request_data)
  end

  if common.condition_check_and_continue(task, content, rule, digest,
      cloudmark_check_uncached, maybe_part) then
    return
  else
    cloudmark_check_uncached()
  end
end

return {
  type = { 'cloudmark', 'scanner' },
  description = 'Cloudmark cartridge interface',
  configure = cloudmark_config,
  check = cloudmark_check,
  name = N,
}