HEX
Server: Apache/2
System: Linux nexus-01 4.18.0-553.120.1.el8_10.x86_64 #1 SMP Mon Apr 20 18:04:27 EDT 2026 x86_64
User: aglcoke (1118)
PHP: 8.2.31
Disabled: mail,exec,system,passthru,shell_exec,proc_close,proc_open,dl,popen,show_source,posix_kill,posix_mkfifo,posix_getpwuid,posix_setpgid,posix_setsid,posix_setuid,posix_setgid,posix_seteuid,posix_setegid,posix_uname
Upload Files
File: //usr/share/rspamd/lualib/lua_feedback_parsers.lua
--[[
Copyright (c) 2026, Vsevolod Stakhov <vsevolod@rspamd.com>

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
]] --

--[[[
-- @module lua_feedback_parsers
-- This module provides parsers for inbound feedback reports that arrive as
-- a regular message (MIME) on a task:
--
--   * RFC 3464 Delivery Status Notifications (DSN / bounces)
--   * RFC 5965 Abuse Reporting Format (ARF / FBL)
--
-- The parsers operate on a `task` object and return a structured Lua table
-- describing the report, or `nil` if the message is not a report of that
-- kind. Both parsers are defensive: malformed bodies will not raise an
-- error, they will produce a partial result at best (documented per
-- function) or `nil`.
--]]

local rspamd_logger = require 'rspamd_logger'
local lua_util = require 'lua_util'

local N = 'lua_feedback_parsers'
local str_trim = lua_util.str_trim
local str_split = lua_util.rspamd_str_split

local exports = {}

-- Strip a single pair of outermost angle brackets, e.g. `<id@example>`.
local function strip_angles(s)
  if not s then
    return nil
  end
  local inner = s:match('^%s*<(.-)>%s*$')
  if inner then
    return inner
  end
  return str_trim(s)
end

--[[
-- Parse an RFC 822 field block (a sequence of header-like lines terminated
-- by a blank line or end of input). Handles header folding: lines that
-- start with a tab or a space are continuations of the previous field.
--
-- Returns:
--   fields - map of lowercased field name -> value (trimmed string)
--   fields_multi - map of lowercased field name -> array of values (in order
--     of appearance); useful for repeated fields such as `Reported-URI`.
--   next_line - 1-based index of the first line AFTER the blank line that
--     terminated the block (or #lines + 1 if the block ran to end of input)
--
-- `start_line` is 1-based.
]]
local function parse_field_block(lines, start_line)
  local fields = {}
  local fields_multi = {}
  local current_name
  local current_value_parts
  local i = start_line or 1
  local n = #lines

  local function flush()
    if current_name then
      local value = str_trim(table.concat(current_value_parts, ' '))
      fields[current_name] = value
      local list = fields_multi[current_name]
      if not list then
        list = {}
        fields_multi[current_name] = list
      end
      list[#list + 1] = value
    end
    current_name = nil
    current_value_parts = nil
  end

  while i <= n do
    local line = lines[i]
    if line == '' then
      flush()
      return fields, fields_multi, i + 1
    end
    local first = line:sub(1, 1)
    if first == ' ' or first == '\t' then
      if current_name then
        current_value_parts[#current_value_parts + 1] = str_trim(line)
      end
      -- else: continuation with no preceding field - ignore
    else
      local name, value = line:match('^([^:]+):%s?(.*)$')
      if name then
        flush()
        current_name = name:lower():gsub('%s+$', '')
        current_value_parts = { value or '' }
      end
      -- else: malformed line - skip it
    end
    i = i + 1
  end

  flush()
  return fields, fields_multi, i
end

-- Split an entire body into an array of field blocks separated by blank
-- lines. Used for message/delivery-status bodies which consist of 1..N
-- blocks.
local function parse_field_blocks(body)
  if type(body) ~= 'string' then
    body = tostring(body or '')
  end
  -- Normalise line endings (drop CR) then split on LF.
  local lines = str_split(body:gsub('\r', ''), '\n')
  if not lines then
    return {}
  end
  local i = 1
  while i <= #lines and lines[i] == '' do
    i = i + 1
  end
  local blocks = {}
  while i <= #lines do
    local fields, fields_multi, next_i = parse_field_block(lines, i)
    if next(fields) ~= nil then
      blocks[#blocks + 1] = {
        fields = fields,
        fields_multi = fields_multi,
      }
    end
    if next_i <= i then
      break
    end
    i = next_i
    while i <= #lines and lines[i] == '' do
      i = i + 1
    end
  end
  return blocks
end

-- Find the topmost multipart/report part in a task that matches the given
-- `report-type` (case-insensitive). Returns the matching mime_part or nil.
local function find_multipart_report(task, wanted_report_type)
  for _, part in ipairs(task:get_parts() or {}) do
    local t, st, params = part:get_type_full()
    if t == 'multipart' and st == 'report' and type(params) == 'table' then
      local rt = params['report-type']
      if rt and rt:lower() == wanted_report_type then
        return part
      end
    end
  end
  return nil
end

-- Find the first sub-part whose Content-Type matches `wanted_type/wanted_subtype`
-- (case-insensitive). If `wanted_subtype` is nil, only `wanted_type` is
-- matched.
local function find_part_by_type(task, wanted_type, wanted_subtype)
  for _, part in ipairs(task:get_parts() or {}) do
    local t, st = part:get_type()
    if t and t:lower() == wanted_type and
        (not wanted_subtype or (st and st:lower() == wanted_subtype)) then
      return part
    end
  end
  return nil
end

-- Locate the embedded original message in a report.
-- Returns (part, kind) where kind is 'full' for message/rfc822|message/global
-- (headers+body) and 'headers' for text/rfc822-headers (headers only).
local function find_original_message_part(task)
  for _, part in ipairs(task:get_parts() or {}) do
    local t, st = part:get_type()
    if t and st then
      local lt = t:lower()
      local lst = st:lower()
      if lt == 'message' and (lst == 'rfc822' or lst == 'global') then
        return part, 'full'
      end
      if lt == 'text' and lst == 'rfc822-headers' then
        return part, 'headers'
      end
    end
  end
  return nil
end

-- Extract the standard subset of original-message headers we care about from
-- the content of a message/rfc822 (or text/rfc822-headers) sub-part.
local function extract_original_message(part)
  local content = part:get_content()
  if not content then
    return nil
  end
  content = tostring(content)
  if content == '' then
    return nil
  end
  local lines = str_split(content:gsub('\r', ''), '\n')
  if not lines then
    return nil
  end
  local headers = parse_field_block(lines, 1)
  if not headers or next(headers) == nil then
    return nil
  end
  local out = {
    message_id = strip_angles(headers['message-id']),
    from = strip_angles(headers['from']),
    to = strip_angles(headers['to']),
    subject = headers['subject'],
    date = headers['date'],
  }
  if not (out.message_id or out.from or out.to or out.subject or out.date) then
    return nil
  end
  return out
end

-- ----------------------------------------------------------------------------
-- DSN (RFC 3464)
-- ----------------------------------------------------------------------------

--[[[
-- @function lua_feedback_parsers.parse_dsn(task)
-- Parse an RFC 3464 Delivery Status Notification from the given task.
--
-- Detection: the task must contain either a `multipart/report` part with
-- `report-type=delivery-status`, or a `message/delivery-status` sub-part.
-- If neither is present, returns `nil`.
--
-- Malformed-body policy: if detection succeeds but the body cannot be
-- parsed into at least one non-empty field block, the function still
-- returns a table (with `recipients = {}`) so that callers can distinguish
-- "not a DSN" (nil) from "a DSN we couldn't fully parse" (table with
-- mostly-nil fields).
--
-- @param {rspamd_task} task message to inspect
-- @return {table|nil} parsed DSN, see module doc for the shape
--]]
function exports.parse_dsn(task)
  if not task then
    return nil
  end

  -- Detection: prefer the envelope multipart/report, but also accept a
  -- bare message/delivery-status (some MTAs emit non-standard shapes).
  local envelope = find_multipart_report(task, 'delivery-status')
  local status_part = find_part_by_type(task, 'message', 'delivery-status')
  if not envelope and not status_part then
    return nil
  end

  local result = {
    reporting_mta = nil,
    original_envelope_id = nil,
    arrival_date = nil,
    received_from_mta = nil,
    recipients = {},
    original_message = nil,
  }

  if status_part then
    local body = status_part:get_content()
    if body then
      local blocks = parse_field_blocks(tostring(body))
      if #blocks > 0 then
        local per_message = blocks[1].fields
        result.reporting_mta = per_message['reporting-mta']
        result.original_envelope_id = per_message['original-envelope-id']
        result.arrival_date = per_message['arrival-date']
        result.received_from_mta = per_message['received-from-mta']
        for j = 2, #blocks do
          local rf = blocks[j].fields
          result.recipients[#result.recipients + 1] = {
            original_recipient = rf['original-recipient'],
            final_recipient = rf['final-recipient'],
            action = rf['action'] and rf['action']:lower() or nil,
            status = rf['status'],
            diagnostic_code = rf['diagnostic-code'],
            remote_mta = rf['remote-mta'],
            last_attempt_date = rf['last-attempt-date'],
          }
        end
      else
        rspamd_logger.debugm(N, task, 'DSN detected but delivery-status body has no parseable blocks')
      end
    else
      rspamd_logger.debugm(N, task, 'DSN detected but delivery-status part content is empty')
    end
  end

  local orig_part = find_original_message_part(task)
  if orig_part then
    result.original_message = extract_original_message(orig_part)
  end

  return result
end

-- ----------------------------------------------------------------------------
-- ARF (RFC 5965)
-- ----------------------------------------------------------------------------

--[[[
-- @function lua_feedback_parsers.parse_arf(task)
-- Parse an RFC 5965 Abuse Reporting Format (ARF) feedback report.
--
-- Detection: the task must contain a `multipart/report` part with
-- `report-type=feedback-report` AND a sub-part with
-- `message/feedback-report`. If either is missing, returns `nil`.
--
-- Malformed-body policy: same as `parse_dsn`. If detection succeeds but
-- the feedback-report body is unparseable, a table is still returned (with
-- mostly-nil fields and `reported_uri = {}`).
--
-- @param {rspamd_task} task message to inspect
-- @return {table|nil} parsed ARF, see module doc for the shape
--]]
function exports.parse_arf(task)
  if not task then
    return nil
  end

  local envelope = find_multipart_report(task, 'feedback-report')
  if not envelope then
    return nil
  end
  local fb_part = find_part_by_type(task, 'message', 'feedback-report')
  if not fb_part then
    return nil
  end

  local result = {
    feedback_type = nil,
    version = nil,
    user_agent = nil,
    original_mail_from = nil,
    original_rcpt_to = nil,
    arrival_date = nil,
    source_ip = nil,
    reported_domain = nil,
    reported_uri = {},
    authentication_results = nil,
    original_envelope_id = nil,
    incidents = nil,
    original_message = nil,
  }

  local body = fb_part:get_content()
  if body then
    local blocks = parse_field_blocks(tostring(body))
    if #blocks > 0 then
      local f = blocks[1].fields
      local fm = blocks[1].fields_multi
      result.feedback_type = f['feedback-type'] and f['feedback-type']:lower() or nil
      result.version = f['version']
      result.user_agent = f['user-agent']
      result.original_mail_from = strip_angles(f['original-mail-from'])
      result.original_rcpt_to = strip_angles(f['original-rcpt-to'])
      result.arrival_date = f['arrival-date']
      result.source_ip = f['source-ip']
      result.reported_domain = f['reported-domain']
      result.authentication_results = f['authentication-results']
      result.original_envelope_id = f['original-envelope-id']
      if f['incidents'] then
        local n = tonumber(f['incidents'])
        if n then
          result.incidents = n
        end
      end
      if fm and fm['reported-uri'] then
        for _, v in ipairs(fm['reported-uri']) do
          result.reported_uri[#result.reported_uri + 1] = v
        end
      end
    else
      rspamd_logger.debugm(N, task, 'ARF detected but feedback-report body has no parseable blocks')
    end
  else
    rspamd_logger.debugm(N, task, 'ARF detected but feedback-report part content is empty')
  end

  local orig_part = find_original_message_part(task)
  if orig_part then
    local om = extract_original_message(orig_part)
    if om then
      -- RFC 5965 consumers typically only care about Message-ID and From.
      result.original_message = {
        message_id = om.message_id,
        from = om.from,
      }
    end
  end

  return result
end

-- Exposed for unit tests.
exports._parse_field_blocks = parse_field_blocks
exports._strip_angles = strip_angles

return exports