1package smtpserver

3import (

4 "errors"

5 "fmt"

6 "log/slog"

7 "strings"

8 "time"

10 "github.com/mjl-/bstore"

12 "github.com/mjl-/mox/dns"

13 "github.com/mjl-/mox/mlog"

14 "github.com/mjl-/mox/smtp"

15 "github.com/mjl-/mox/store"

16)

18type reputationMethod string

20const (

21 methodMsgfromFull reputationMethod = "msgfromfull"

22 methodMsgtoFull reputationMethod = "msgtofull"

23 methodMsgfromDomain reputationMethod = "msgfromdomain"

24 methodMsgfromOrgDomain reputationMethod = "msgfromorgdomain"

25 methodMsgtoDomain reputationMethod = "msgtodomain"

26 methodMsgtoOrgDomain reputationMethod = "msgtoorgdomain"

27 methodDKIMSPF reputationMethod = "dkimspf"

28 methodIP1 reputationMethod = "ip1"

29 methodIP2 reputationMethod = "ip2"

30 methodIP3 reputationMethod = "ip3"

31 methodNone reputationMethod = "none"

32)

34// Reputation returns whether message m is likely junk.

35//

36// This function is called after checking for a manually configured spf mailfrom

37// allow (e.g. for mailing lists), and after checking for a dmarc reject policy.

38//

39// The decision is made based on historic messages delivered to the same

40// destination mailbox, MailboxOrigID. Because each mailbox may have a different

41// accept policy. We only use messages that have been marked as either junk or

42// non-junk. We help users by automatically marking them as non-junk when moving to

43// certain folders in the default config (e.g. the archive folder). We expect users

44// to mark junk messages as such when they read it. And to keep it in their inbox,

45// regular trash or archive if it is not.

46//

47// The basic idea is to keep accepting messages that were accepted in the past, and

48// keep rejecting those that were rejected. This is relatively easy to check if

49// mail passes SPF and/or DKIM with Message-From alignment. Regular email from

50// known people will be let in. But spammers are trickier. They will use new IPs,

51// (sub)domains, no or newly created SPF and/or DKIM identifiers, new localparts,

52// etc. This function likely ends up returning "inconclusive" for such emails. The

53// junkfilter will have to take care of a final decision.

54//

55// In case of doubt, it doesn't hurt much to accept another mail that a user has

56// communicated successfully with in the past. If the most recent message is marked

57// as junk that could have happened accidentally. If another message is let in, and

58// it is again junk, future messages will be rejected.

59//

60// Actual spammers will probably try to use identifiers, i.e. (sub)domain, dkim/spf

61// identifiers and ip addresses for which we have no history. We may only have

62// ip-based reputation, perhaps only an ip range, perhaps nothing.

63//

64// Some profiles of first-time senders:

65//

66// - Individuals. They can typically get past the junkfilter if needed.

67// - Transactional emails. They should get past the junkfilter. If they use one of

68// the larger email service providers, their reputation could help. If the

69// junkfilter rejects the message, users can recover the message from the Rejects

70// mailbox. The first message is typically initiated by a user, e.g. by registering.

71// - Desired commercial email will have to get past the junkfilter based on its

72// content. There will typically be earlier communication with the (organizational)

73// domain that would let the message through.

74// - Mailing list. May get past the junkfilter. If delivery is to a separate

75// mailbox, the junkfilter will let it in because of little history. Long enough to

76// build reputation based on DKIM/SPF signals. Users are best off to

77// configure accept rules for messages from mailing lists.

78//

79// The decision-making process looks at historic messages. The following properties

80// are checked until matching messages are found. If they are found, a decision is

81// returned, which may be inconclusive. The next property on the list is only

82// checked if a step did not match any messages.

83//

84// - Messages matching full "message from" address, either with strict/relaxed

85// dkim/spf-verification, or without.

86// - Messages the user sent to the "message from" address.

87// - Messages matching only the domain of the "message from" address (different

88// localpart), again with verification or without.

89// - Messages sent to an address in the domain of the "message from" address.

90// - The previous two checks again, but now checking against the organizational

91// domain instead of the exact domain.

92// - Matching DKIM domains and a matching SPF mailfrom, or mailfrom domain, or ehlo

93// domain.

94// - "Exact" IP, or nearby IPs.

95//

96// References:

97// ../rfc/5863

98// ../rfc/7960

99// ../rfc/6376:1915

100// ../rfc/6376:3716

101// ../rfc/7208:2167

102func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message, smtputf8 bool) (rjunk *bool, rconclusive bool, rmethod reputationMethod, reasonText string, rerr error) {

103 boolptr := func(v bool) *bool {

104 return &v

105 }

106 xfalse := boolptr(false)

107 xtrue := boolptr(true)

108

109 type queryError string

110

111 defer func() {

112 x := recover()

113 if x == nil {

114 return

115 }

116 if xerr, ok := x.(queryError); ok {

117 rerr = errors.New(string(xerr))

118 return

119 }

120 panic(x)

121 }()

122

123 now := time.Now()

124

125 // messageQuery returns a base query for historic seen messages to the same

126 // mailbox, at most maxAge old, and at most maxCount messages.

127 messageQuery := func(fm *store.Message, maxAge time.Duration, maxCount int) *bstore.Query[store.Message] {

128 q := bstore.QueryTx[store.Message](tx)

129 q.FilterEqual("MailboxOrigID", m.MailboxID)

130 q.FilterEqual("Expunged", false)

131 q.FilterFn(func(m store.Message) bool {

132 return m.Junk || m.Notjunk

133 })

134 if fm != nil {

135 q.FilterNonzero(*fm)

136 }

137 q.FilterGreaterEqual("Received", now.Add(-maxAge))

138 q.Limit(maxCount)

139 q.SortDesc("Received")

140 return q

141 }

142

143 // Execute the query, returning messages or returning error through panic.

144 xmessageList := func(q *bstore.Query[store.Message], descr string) []store.Message {

145 t0 := time.Now()

146 l, err := q.List()

147 log.Debugx("querying messages for reputation", err,

148 slog.Int("msgs", len(l)),

149 slog.String("descr", descr),

150 slog.Duration("queryduration", time.Since(t0)))

151 if err != nil {

152 panic(queryError(fmt.Sprintf("listing messages: %v", err)))

153 }

154 return l

155 }

156

157 xrecipientExists := func(q *bstore.Query[store.Recipient]) bool {

158 exists, err := q.Exists()

159 if err != nil {

160 panic(queryError(fmt.Sprintf("checking for recipient: %v", err)))

161 }

162 return exists

163 }

164

165 const year = 365 * 24 * time.Hour

166

167 // Look for historic messages with same "message from" address. We'll

168 // treat any validation (strict/dmarc/relaxed) the same, but "none"

169 // separately.

170 //

171 // We only need 1 message, and sometimes look at a second message. If

172 // the last message or the message before was an accept, we accept. If

173 // the single last or last two were a reject, we reject.

174 //

175 // If there was no validation, any signal is inconclusive.

176 if m.MsgFromDomain != "" {

177 q := messageQuery(&store.Message{MsgFromLocalpart: m.MsgFromLocalpart, MsgFromDomain: m.MsgFromDomain}, 3*year, 2)

178 q.FilterEqual("MsgFromValidated", m.MsgFromValidated)

179 msgs := xmessageList(q, "mgsfromfull")

180 if len(msgs) > 0 {

181 // todo: we may want to look at dkim/spf in this case.

182 spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)

183 conclusive := m.MsgFromValidated

184 return &spam, conclusive, methodMsgfromFull, "reputation of exact message-from address", nil

185 }

186 if !m.MsgFromValidated {

187 // Look for historic messages that were validated. If present, this is likely spam.

188 // Only return as conclusively spam if history also says this From-address sent

189 // spam.

190 q := messageQuery(&store.Message{MsgFromLocalpart: m.MsgFromLocalpart, MsgFromDomain: m.MsgFromDomain, MsgFromValidated: true}, 3*year, 2)

191 msgs = xmessageList(q, "msgfromfull-validated")

192 if len(msgs) > 0 {

193 spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)

194 return xtrue, spam, methodMsgfromFull, "unvalidated message with validated historic messages with exact message-from address", nil

195 }

196 }

197

198 // Look if we ever sent to this address. If so, we accept,

199 qr := bstore.QueryTx[store.Recipient](tx)

200 qr.FilterEqual("Localpart", m.MsgFromLocalpart)

201 qr.FilterEqual("Domain", m.MsgFromDomain)

202 qr.FilterGreaterEqual("Sent", now.Add(-3*year))

203 if xrecipientExists(qr) {

204 return xfalse, true, methodMsgtoFull, "exact message-from address was earlier message recipient", nil

205 }

206

207 // Look for domain match, then for organizational domain match.

208 for _, orgdomain := range []bool{false, true} {

209 qm := store.Message{}

210 var method reputationMethod

211 var source, descr string

212 if orgdomain {

213 qm.MsgFromOrgDomain = m.MsgFromOrgDomain

214 method = methodMsgfromOrgDomain

215 source = "organizational domain of message-from address"

216 descr = "msgfromorgdomain"

217 } else {

218 qm.MsgFromDomain = m.MsgFromDomain

219 method = methodMsgfromDomain

220 source = "exact domain of message-from address"

221 descr = "msgfromdomain"

222 }

223

224 q := messageQuery(&qm, 2*year, 20)

225 q.FilterEqual("MsgFromValidated", m.MsgFromValidated)

226 msgs := xmessageList(q, descr)

227 if len(msgs) > 0 {

228 nonjunk := 0

229 for _, m := range msgs {

230 if !m.Junk {

231 nonjunk++

232 }

233 }

234 if 100*nonjunk/len(msgs) > 80 {

235 reasonText = fmt.Sprintf("positive reputation with %s based on %d messages", source, len(msgs))

236 return xfalse, true, method, reasonText, nil

237 }

238 if nonjunk == 0 {

239 // Only conclusive with at least 3 different localparts.

240 localparts := map[smtp.Localpart]struct{}{}

241 for _, m := range msgs {

242 localparts[m.MsgFromLocalpart] = struct{}{}

243 if len(localparts) == 3 {

244 reasonText = fmt.Sprintf("negative reputation of at least 3 addresses with %s based on %d messages", source, len(msgs))

245 return xtrue, true, method, reasonText, nil

246 }

247 }

248 reasonText = fmt.Sprintf("negative reputation with %s based on %d messages", source, len(msgs))

249 return xtrue, false, method, reasonText, nil

250 }

251 // Mixed signals from domain. We don't want to block a new sender.

252 reasonText = fmt.Sprintf("mixed signals with %s based on %d messages", source, len(msgs))

253 return nil, false, method, reasonText, nil

254 }

255 if !m.MsgFromValidated {

256 // Look for historic messages that were validated. If present, this is likely spam.

257 // Only return as conclusively spam if history also says this From-address sent

258 // spam.

259 q := messageQuery(&qm, 2*year, 2)

260 q.FilterEqual("MsgFromValidated", true)

261 msgs = xmessageList(q, descr+"-validated")

262 if len(msgs) > 0 {

263 spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)

264 reasonText = fmt.Sprintf("unvalidated message with %s while we have validated messages from that source", source)

265 return xtrue, spam, method, reasonText, nil

266 }

267 }

268

269 // Look if we ever sent to this address. If so, we accept,

270 qr := bstore.QueryTx[store.Recipient](tx)

271 if orgdomain {

272 qr.FilterEqual("OrgDomain", m.MsgFromOrgDomain)

273 method = methodMsgtoOrgDomain

274 source = "organizational domain of message-from address"

275 } else {

276 qr.FilterEqual("Domain", m.MsgFromDomain)

277 method = methodMsgtoDomain

278 source = "exact domain of message-from address"

279 }

280 qr.FilterGreaterEqual("Sent", now.Add(-2*year))

281 if xrecipientExists(qr) {

282 reasonText = fmt.Sprintf("%s was recipient address", source)

283 return xfalse, true, method, reasonText, nil

284 }

285 }

286 }

287

288 // DKIM and SPF.

289 // We only use identities that passed validation. Failed identities are ignored. ../rfc/6376:2447

290 // todo future: we could do something with the DKIM identity (i=) field if it is more specific than just the domain (d=).

291 dkimspfsignals := []float64{}

292 dkimspfreasondoms := []string{}

293 dkimspfmsgs := 0

294 for _, dom := range m.DKIMDomains {

295 q := messageQuery(nil, year/2, 50)

296 q.FilterIn("DKIMDomains", dom)

297 msgs := xmessageList(q, "dkimdomain")

298 if len(msgs) > 0 {

299 nspam := 0

300 for _, m := range msgs {

301 if m.Junk {

302 nspam++

303 }

304 }

305 pspam := float64(nspam) / float64(len(msgs))

306 dkimspfsignals = append(dkimspfsignals, pspam)

307 dkimspfreasondoms = append(dkimspfreasondoms, dom)

308 dkimspfmsgs = len(msgs)

309 }

310 }

311 if m.MailFromValidated || m.EHLOValidated {

312 var dom string

313 var msgs []store.Message

314 if m.MailFromValidated && m.MailFromDomain != "" {

315 dom = m.MailFromDomain

316 q := messageQuery(&store.Message{MailFromLocalpart: m.MailFromLocalpart, MailFromDomain: m.MailFromDomain}, year/2, 50)

317 msgs = xmessageList(q, "mailfrom")

318 if len(msgs) == 0 {

319 q := messageQuery(&store.Message{MailFromDomain: m.MailFromDomain}, year/2, 50)

320 msgs = xmessageList(q, "mailfromdomain")

321 }

322 }

323 if len(msgs) == 0 && m.EHLOValidated && m.EHLODomain != "" {

324 dom = m.EHLODomain

325 q := messageQuery(&store.Message{EHLODomain: m.EHLODomain}, year/2, 50)

326 msgs = xmessageList(q, "ehlodomain")

327 }

328 if len(msgs) > 0 {

329 nspam := 0

330 for _, m := range msgs {

331 if m.Junk {

332 nspam++

333 }

334 }

335 pspam := float64(nspam) / float64(len(msgs))

336 dkimspfsignals = append(dkimspfsignals, pspam)

337 dkimspfreasondoms = append(dkimspfreasondoms, dom)

338 if len(msgs) > dkimspfmsgs {

339 dkimspfmsgs = len(msgs)

340 }

341 }

342 }

343 if len(dkimspfsignals) > 0 {

344 var nham, nspam int

345 var hamdoms, spamdoms []string

346 for i, p := range dkimspfsignals {

347 d, _ := dns.ParseDomain(dkimspfreasondoms[i])

348 if p < .1 {

349 nham++

350 hamdoms = append(hamdoms, d.XName(smtputf8))

351 } else if p > .9 {

352 nspam++

353 spamdoms = append(spamdoms, d.XName(smtputf8))

354 }

355 }

356 if nham > 0 && nspam == 0 {

357 reasonText = fmt.Sprintf("positive dkim/spf reputation for domain(s) %s", strings.Join(hamdoms, ","))

358 return xfalse, true, methodDKIMSPF, reasonText, nil

359 }

360 if nspam > 0 && nham == 0 {

361 reasonText = fmt.Sprintf("negative dkim/spf reputation for domain(s) %s", strings.Join(hamdoms, ","))

362 return xtrue, dkimspfmsgs > 1, methodDKIMSPF, reasonText, nil

363 }

364 reasonText = fmt.Sprintf("mixed dkim/spf reputation, positive for %s, negative for %s", strings.Join(hamdoms, ","), strings.Join(spamdoms, ","))

365 return nil, false, methodDKIMSPF, reasonText, nil

366 }

367

368 // IP-based. A wider mask needs more messages to be conclusive.

369 // We require the resulting signal to be strong, i.e. likely ham or likely spam.

370 var msgs []store.Message

371 var need int

372 var method reputationMethod

373 var ip string

374 if m.RemoteIPMasked1 != "" {

375 q := messageQuery(&store.Message{RemoteIPMasked1: m.RemoteIPMasked1}, year/4, 50)

376 msgs = xmessageList(q, "ip1")

377 need = 2

378 method = methodIP1

379 ip = m.RemoteIPMasked1

380 }

381 if len(msgs) == 0 && m.RemoteIPMasked2 != "" {

382 q := messageQuery(&store.Message{RemoteIPMasked2: m.RemoteIPMasked2}, year/4, 50)

383 msgs = xmessageList(q, "ip2")

384 need = 5

385 method = methodIP2

386 ip = m.RemoteIPMasked2

387 }

388 if len(msgs) == 0 && m.RemoteIPMasked3 != "" {

389 q := messageQuery(&store.Message{RemoteIPMasked3: m.RemoteIPMasked3}, year/4, 50)

390 msgs = xmessageList(q, "ip3")

391 need = 10

392 method = methodIP3

393 ip = m.RemoteIPMasked3

394 }

395 if len(msgs) > 0 {

396 nspam := 0

397 for _, m := range msgs {

398 if m.Junk {

399 nspam++

400 }

401 }

402 pspam := float64(nspam) / float64(len(msgs))

403 var spam *bool

404 if pspam < .25 {

405 spam = xfalse

406 } else if pspam > .75 {

407 spam = xtrue

408 }

409 conclusive := len(msgs) >= need && (pspam <= 0.1 || pspam >= 0.9)

410 v6 := strings.Contains(m.RemoteIP, ":")

411 reasonText = fmt.Sprintf("reputation for ip %s%s, spam score %.2f", ip, maskclasses[classmask{v6, method}], pspam)

412 return spam, conclusive, method, reasonText, nil

413 }

414

415 return nil, false, methodNone, "no address/spf/dkim/ip reputation", nil

416}

417

418type classmask struct {

419 v6 bool

420 method reputationMethod

421}

422

423var maskclasses = map[classmask]string{

424 {false, methodIP1}: "/32",

425 {false, methodIP2}: "/26",

426 {false, methodIP3}: "/21",

427 {true, methodIP1}: "/64",

428 {true, methodIP2}: "/48",

429 {true, methodIP3}: "/32",

430}

431