1package smtpserver

3import (

4 "errors"

5 "fmt"

6 "log/slog"

7 "time"

9 "github.com/mjl-/bstore"

11 "github.com/mjl-/mox/mlog"

12 "github.com/mjl-/mox/smtp"

13 "github.com/mjl-/mox/store"

14)

16type reputationMethod string

18const (

19 methodMsgfromFull reputationMethod = "msgfromfull"

20 methodMsgtoFull reputationMethod = "msgtofull"

21 methodMsgfromDomain reputationMethod = "msgfromdomain"

22 methodMsgfromOrgDomain reputationMethod = "msgfromorgdomain"

23 methodMsgtoDomain reputationMethod = "msgtodomain"

24 methodMsgtoOrgDomain reputationMethod = "msgtoorgdomain"

25 methodDKIMSPF reputationMethod = "dkimspf"

26 methodIP1 reputationMethod = "ip1"

27 methodIP2 reputationMethod = "ip2"

28 methodIP3 reputationMethod = "ip3"

29 methodNone reputationMethod = "none"

30)

32// Reputation returns whether message m is likely junk.

33//

34// This function is called after checking for a manually configured spf mailfrom

35// allow (e.g. for mailing lists), and after checking for a dmarc reject policy.

36//

37// The decision is made based on historic messages delivered to the same

38// destination mailbox, MailboxOrigID. Because each mailbox may have a different

39// accept policy. We only use messages that have been marked as either junk or

40// non-junk. We help users by automatically marking them as non-junk when moving to

41// certain folders in the default config (e.g. the archive folder). We expect users

42// to mark junk messages as such when they read it. And to keep it in their inbox,

43// regular trash or archive if it is not.

44//

45// The basic idea is to keep accepting messages that were accepted in the past, and

46// keep rejecting those that were rejected. This is relatively easy to check if

47// mail passes SPF and/or DKIM with Message-From alignment. Regular email from

48// known people will be let in. But spammers are trickier. They will use new IPs,

49// (sub)domains, no or newly created SPF and/or DKIM identifiers, new localparts,

50// etc. This function likely ends up returning "inconclusive" for such emails. The

51// junkfilter will have to take care of a final decision.

52//

53// In case of doubt, it doesn't hurt much to accept another mail that a user has

54// communicated successfully with in the past. If the most recent message is marked

55// as junk that could have happened accidentally. If another message is let in, and

56// it is again junk, future messages will be rejected.

57//

58// Actual spammers will probably try to use identifiers, i.e. (sub)domain, dkim/spf

59// identifiers and ip addresses for which we have no history. We may only have

60// ip-based reputation, perhaps only an ip range, perhaps nothing.

61//

62// Some profiles of first-time senders:

63//

64// - Individuals. They can typically get past the junkfilter if needed.

65// - Transactional emails. They should get past the junkfilter. If they use one of

66// the larger email service providers, their reputation could help. If the

67// junkfilter rejects the message, users can recover the message from the Rejects

68// mailbox. The first message is typically initiated by a user, e.g. by registering.

69// - Desired commercial email will have to get past the junkfilter based on its

70// content. There will typically be earlier communication with the (organizational)

71// domain that would let the message through.

72// - Mailing list. May get past the junkfilter. If delivery is to a separate

73// mailbox, the junkfilter will let it in because of little history. Long enough to

74// build reputation based on DKIM/SPF signals. Users are best off to

75// configure accept rules for messages from mailing lists.

76//

77// The decision-making process looks at historic messages. The following properties

78// are checked until matching messages are found. If they are found, a decision is

79// returned, which may be inconclusive. The next property on the list is only

80// checked if a step did not match any messages.

81//

82// - Messages matching full "message from" address, either with strict/relaxed

83// dkim/spf-verification, or without.

84// - Messages the user sent to the "message from" address.

85// - Messages matching only the domain of the "message from" address (different

86// localpart), again with verification or without.

87// - Messages sent to an address in the domain of the "message from" address.

88// - The previous two checks again, but now checking against the organizational

89// domain instead of the exact domain.

90// - Matching DKIM domains and a matching SPF mailfrom, or mailfrom domain, or ehlo

91// domain.

92// - "Exact" IP, or nearby IPs.

93//

94// References:

95// ../rfc/5863

96// ../rfc/7960

97// ../rfc/6376:1915

98// ../rfc/6376:3716

99// ../rfc/7208:2167

100func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rconclusive bool, rmethod reputationMethod, rerr error) {

101 boolptr := func(v bool) *bool {

102 return &v

103 }

104 xfalse := boolptr(false)

105 xtrue := boolptr(true)

106

107 type queryError string

108

109 defer func() {

110 x := recover()

111 if x == nil {

112 return

113 }

114 if xerr, ok := x.(queryError); ok {

115 rerr = errors.New(string(xerr))

116 return

117 }

118 panic(x)

119 }()

120

121 now := time.Now()

122

123 // messageQuery returns a base query for historic seen messages to the same

124 // mailbox, at most maxAge old, and at most maxCount messages.

125 messageQuery := func(fm *store.Message, maxAge time.Duration, maxCount int) *bstore.Query[store.Message] {

126 q := bstore.QueryTx[store.Message](tx)

127 q.FilterEqual("MailboxOrigID", m.MailboxID)

128 q.FilterEqual("Expunged", false)

129 q.FilterFn(func(m store.Message) bool {

130 return m.Junk || m.Notjunk

131 })

132 if fm != nil {

133 q.FilterNonzero(*fm)

134 }

135 q.FilterGreaterEqual("Received", now.Add(-maxAge))

136 q.Limit(maxCount)

137 q.SortDesc("Received")

138 return q

139 }

140

141 // Execute the query, returning messages or returning error through panic.

142 xmessageList := func(q *bstore.Query[store.Message], descr string) []store.Message {

143 t0 := time.Now()

144 l, err := q.List()

145 log.Debugx("querying messages for reputation", err,

146 slog.Int("msgs", len(l)),

147 slog.String("descr", descr),

148 slog.Duration("queryduration", time.Since(t0)))

149 if err != nil {

150 panic(queryError(fmt.Sprintf("listing messages: %v", err)))

151 }

152 return l

153 }

154

155 xrecipientExists := func(q *bstore.Query[store.Recipient]) bool {

156 exists, err := q.Exists()

157 if err != nil {

158 panic(queryError(fmt.Sprintf("checking for recipient: %v", err)))

159 }

160 return exists

161 }

162

163 const year = 365 * 24 * time.Hour

164

165 // Look for historic messages with same "message from" address. We'll

166 // treat any validation (strict/dmarc/relaxed) the same, but "none"

167 // separately.

168 //

169 // We only need 1 message, and sometimes look at a second message. If

170 // the last message or the message before was an accept, we accept. If

171 // the single last or last two were a reject, we reject.

172 //

173 // If there was no validation, any signal is inconclusive.

174 if m.MsgFromDomain != "" {

175 q := messageQuery(&store.Message{MsgFromLocalpart: m.MsgFromLocalpart, MsgFromDomain: m.MsgFromDomain}, 3*year, 2)

176 q.FilterEqual("MsgFromValidated", m.MsgFromValidated)

177 msgs := xmessageList(q, "mgsfromfull")

178 if len(msgs) > 0 {

179 // todo: we may want to look at dkim/spf in this case.

180 spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)

181 conclusive := m.MsgFromValidated

182 return &spam, conclusive, methodMsgfromFull, nil

183 }

184 if !m.MsgFromValidated {

185 // Look for historic messages that were validated. If present, this is likely spam.

186 // Only return as conclusively spam if history also says this From-address sent

187 // spam.

188 q := messageQuery(&store.Message{MsgFromLocalpart: m.MsgFromLocalpart, MsgFromDomain: m.MsgFromDomain, MsgFromValidated: true}, 3*year, 2)

189 msgs = xmessageList(q, "msgfromfull-validated")

190 if len(msgs) > 0 {

191 spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)

192 return xtrue, spam, methodMsgfromFull, nil

193 }

194 }

195

196 // Look if we ever sent to this address. If so, we accept,

197 qr := bstore.QueryTx[store.Recipient](tx)

198 qr.FilterEqual("Localpart", m.MsgFromLocalpart)

199 qr.FilterEqual("Domain", m.MsgFromDomain)

200 qr.FilterGreaterEqual("Sent", now.Add(-3*year))

201 if xrecipientExists(qr) {

202 return xfalse, true, methodMsgtoFull, nil

203 }

204

205 // Look for domain match, then for organizational domain match.

206 for _, orgdomain := range []bool{false, true} {

207 qm := store.Message{}

208 var method reputationMethod

209 var descr string

210 if orgdomain {

211 qm.MsgFromOrgDomain = m.MsgFromOrgDomain

212 method = methodMsgfromOrgDomain

213 descr = "msgfromorgdomain"

214 } else {

215 qm.MsgFromDomain = m.MsgFromDomain

216 method = methodMsgfromDomain

217 descr = "msgfromdomain"

218 }

219

220 q := messageQuery(&qm, 2*year, 20)

221 q.FilterEqual("MsgFromValidated", m.MsgFromValidated)

222 msgs := xmessageList(q, descr)

223 if len(msgs) > 0 {

224 nonjunk := 0

225 for _, m := range msgs {

226 if !m.Junk {

227 nonjunk++

228 }

229 }

230 if 100*nonjunk/len(msgs) > 80 {

231 return xfalse, true, method, nil

232 }

233 if nonjunk == 0 {

234 // Only conclusive with at least 3 different localparts.

235 localparts := map[smtp.Localpart]struct{}{}

236 for _, m := range msgs {

237 localparts[m.MsgFromLocalpart] = struct{}{}

238 if len(localparts) == 3 {

239 return xtrue, true, method, nil

240 }

241 }

242 return xtrue, false, method, nil

243 }

244 // Mixed signals from domain. We don't want to block a new sender.

245 return nil, false, method, nil

246 }

247 if !m.MsgFromValidated {

248 // Look for historic messages that were validated. If present, this is likely spam.

249 // Only return as conclusively spam if history also says this From-address sent

250 // spam.

251 q := messageQuery(&qm, 2*year, 2)

252 q.FilterEqual("MsgFromValidated", true)

253 msgs = xmessageList(q, descr+"-validated")

254 if len(msgs) > 0 {

255 spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)

256 return xtrue, spam, method, nil

257 }

258 }

259

260 // Look if we ever sent to this address. If so, we accept,

261 qr := bstore.QueryTx[store.Recipient](tx)

262 if orgdomain {

263 qr.FilterEqual("OrgDomain", m.MsgFromOrgDomain)

264 method = methodMsgtoOrgDomain

265 } else {

266 qr.FilterEqual("Domain", m.MsgFromDomain)

267 method = methodMsgtoDomain

268 }

269 qr.FilterGreaterEqual("Sent", now.Add(-2*year))

270 if xrecipientExists(qr) {

271 return xfalse, true, method, nil

272 }

273 }

274 }

275

276 // DKIM and SPF.

277 // We only use identities that passed validation. Failed identities are ignored. ../rfc/6376:2447

278 // todo future: we could do something with the DKIM identity (i=) field if it is more specific than just the domain (d=).

279 dkimspfsignals := []float64{}

280 dkimspfmsgs := 0

281 for _, dom := range m.DKIMDomains {

282 q := messageQuery(nil, year/2, 50)

283 q.FilterIn("DKIMDomains", dom)

284 msgs := xmessageList(q, "dkimdomain")

285 if len(msgs) > 0 {

286 nspam := 0

287 for _, m := range msgs {

288 if m.Junk {

289 nspam++

290 }

291 }

292 pspam := float64(nspam) / float64(len(msgs))

293 dkimspfsignals = append(dkimspfsignals, pspam)

294 dkimspfmsgs = len(msgs)

295 }

296 }

297 if m.MailFromValidated || m.EHLOValidated {

298 var msgs []store.Message

299 if m.MailFromValidated && m.MailFromDomain != "" {

300 q := messageQuery(&store.Message{MailFromLocalpart: m.MailFromLocalpart, MailFromDomain: m.MailFromDomain}, year/2, 50)

301 msgs = xmessageList(q, "mailfrom")

302 if len(msgs) == 0 {

303 q := messageQuery(&store.Message{MailFromDomain: m.MailFromDomain}, year/2, 50)

304 msgs = xmessageList(q, "mailfromdomain")

305 }

306 }

307 if len(msgs) == 0 && m.EHLOValidated && m.EHLODomain != "" {

308 q := messageQuery(&store.Message{EHLODomain: m.EHLODomain}, year/2, 50)

309 msgs = xmessageList(q, "ehlodomain")

310 }

311 if len(msgs) > 0 {

312 nspam := 0

313 for _, m := range msgs {

314 if m.Junk {

315 nspam++

316 }

317 }

318 pspam := float64(nspam) / float64(len(msgs))

319 dkimspfsignals = append(dkimspfsignals, pspam)

320 if len(msgs) > dkimspfmsgs {

321 dkimspfmsgs = len(msgs)

322 }

323 }

324 }

325 if len(dkimspfsignals) > 0 {

326 var nham, nspam int

327 for _, p := range dkimspfsignals {

328 if p < .1 {

329 nham++

330 } else if p > .9 {

331 nspam++

332 }

333 }

334 if nham > 0 && nspam == 0 {

335 return xfalse, true, methodDKIMSPF, nil

336 }

337 if nspam > 0 && nham == 0 {

338 return xtrue, dkimspfmsgs > 1, methodDKIMSPF, nil

339 }

340 return nil, false, methodDKIMSPF, nil

341 }

342

343 // IP-based. A wider mask needs more messages to be conclusive.

344 // We require the resulting signal to be strong, i.e. likely ham or likely spam.

345 var msgs []store.Message

346 var need int

347 var method reputationMethod

348 if m.RemoteIPMasked1 != "" {

349 q := messageQuery(&store.Message{RemoteIPMasked1: m.RemoteIPMasked1}, year/4, 50)

350 msgs = xmessageList(q, "ip1")

351 need = 2

352 method = methodIP1

353 }

354 if len(msgs) == 0 && m.RemoteIPMasked2 != "" {

355 q := messageQuery(&store.Message{RemoteIPMasked2: m.RemoteIPMasked2}, year/4, 50)

356 msgs = xmessageList(q, "ip2")

357 need = 5

358 method = methodIP2

359 }

360 if len(msgs) == 0 && m.RemoteIPMasked3 != "" {

361 q := messageQuery(&store.Message{RemoteIPMasked3: m.RemoteIPMasked3}, year/4, 50)

362 msgs = xmessageList(q, "ip3")

363 need = 10

364 method = methodIP3

365 }

366 if len(msgs) > 0 {

367 nspam := 0

368 for _, m := range msgs {

369 if m.Junk {

370 nspam++

371 }

372 }

373 pspam := float64(nspam) / float64(len(msgs))

374 var spam *bool

375 if pspam < .25 {

376 spam = xfalse

377 } else if pspam > .75 {

378 spam = xtrue

379 }

380 conclusive := len(msgs) >= need && (pspam <= 0.1 || pspam >= 0.9)

381 return spam, conclusive, method, nil

382 }

383

384 return nil, false, methodNone, nil

385}

386