1package smtpserver

3import (

4 "errors"

5 "fmt"

6 "time"

8 "golang.org/x/exp/slog"

10 "github.com/mjl-/bstore"

12 "github.com/mjl-/mox/mlog"

13 "github.com/mjl-/mox/smtp"

14 "github.com/mjl-/mox/store"

15)

17type reputationMethod string

19const (

20 methodMsgfromFull reputationMethod = "msgfromfull"

21 methodMsgtoFull reputationMethod = "msgtofull"

22 methodMsgfromDomain reputationMethod = "msgfromdomain"

23 methodMsgfromOrgDomain reputationMethod = "msgfromorgdomain"

24 methodMsgtoDomain reputationMethod = "msgtodomain"

25 methodMsgtoOrgDomain reputationMethod = "msgtoorgdomain"

26 methodDKIMSPF reputationMethod = "dkimspf"

27 methodIP1 reputationMethod = "ip1"

28 methodIP2 reputationMethod = "ip2"

29 methodIP3 reputationMethod = "ip3"

30 methodNone reputationMethod = "none"

31)

33// Reputation returns whether message m is likely junk.

34//

35// This function is called after checking for a manually configured spf mailfrom

36// allow (e.g. for mailing lists), and after checking for a dmarc reject policy.

37//

38// The decision is made based on historic messages delivered to the same

39// destination mailbox, MailboxOrigID. Because each mailbox may have a different

40// accept policy. We only use messages that have been marked as either junk or

41// non-junk. We help users by automatically marking them as non-junk when moving to

42// certain folders in the default config (e.g. the archive folder). We expect users

43// to mark junk messages as such when they read it. And to keep it in their inbox,

44// regular trash or archive if it is not.

45//

46// The basic idea is to keep accepting messages that were accepted in the past, and

47// keep rejecting those that were rejected. This is relatively easy to check if

48// mail passes SPF and/or DKIM with Message-From alignment. Regular email from

49// known people will be let in. But spammers are trickier. They will use new IPs,

50// (sub)domains, no or newly created SPF and/or DKIM identifiers, new localparts,

51// etc. This function likely ends up returning "inconclusive" for such emails. The

52// junkfilter will have to take care of a final decision.

53//

54// In case of doubt, it doesn't hurt much to accept another mail that a user has

55// communicated successfully with in the past. If the most recent message is marked

56// as junk that could have happened accidentally. If another message is let in, and

57// it is again junk, future messages will be rejected.

58//

59// Actual spammers will probably try to use identifiers, i.e. (sub)domain, dkim/spf

60// identifiers and ip addresses for which we have no history. We may only have

61// ip-based reputation, perhaps only an ip range, perhaps nothing.

62//

63// Some profiles of first-time senders:

64//

65// - Individuals. They can typically get past the junkfilter if needed.

66// - Transactional emails. They should get past the junkfilter. If they use one of

67// the larger email service providers, their reputation could help. If the

68// junkfilter rejects the message, users can recover the message from the Rejects

69// mailbox. The first message is typically initiated by a user, e.g. by registering.

70// - Desired commercial email will have to get past the junkfilter based on its

71// content. There will typically be earlier communication with the (organizational)

72// domain that would let the message through.

73// - Mailing list. May get past the junkfilter. If delivery is to a separate

74// mailbox, the junkfilter will let it in because of little history. Long enough to

75// build reputation based on DKIM/SPF signals. Users are best off to

76// configure accept rules for messages from mailing lists.

77//

78// The decision-making process looks at historic messages. The following properties

79// are checked until matching messages are found. If they are found, a decision is

80// returned, which may be inconclusive. The next property on the list is only

81// checked if a step did not match any messages.

82//

83// - Messages matching full "message from" address, either with strict/relaxed

84// dkim/spf-verification, or without.

85// - Messages the user sent to the "message from" address.

86// - Messages matching only the domain of the "message from" address (different

87// localpart), again with verification or without.

88// - Messages sent to an address in the domain of the "message from" address.

89// - The previous two checks again, but now checking against the organizational

90// domain instead of the exact domain.

91// - Matching DKIM domains and a matching SPF mailfrom, or mailfrom domain, or ehlo

92// domain.

93// - "Exact" IP, or nearby IPs.

94//

95// References:

96// ../rfc/5863

97// ../rfc/7960

98// ../rfc/6376:1915

99// ../rfc/6376:3716

100// ../rfc/7208:2167

101func reputation(tx *bstore.Tx, log mlog.Log, m *store.Message) (rjunk *bool, rconclusive bool, rmethod reputationMethod, rerr error) {

102 boolptr := func(v bool) *bool {

103 return &v

104 }

105 xfalse := boolptr(false)

106 xtrue := boolptr(true)

107

108 type queryError string

109

110 defer func() {

111 x := recover()

112 if x == nil {

113 return

114 }

115 if xerr, ok := x.(queryError); ok {

116 rerr = errors.New(string(xerr))

117 return

118 }

119 panic(x)

120 }()

121

122 now := time.Now()

123

124 // messageQuery returns a base query for historic seen messages to the same

125 // mailbox, at most maxAge old, and at most maxCount messages.

126 messageQuery := func(fm *store.Message, maxAge time.Duration, maxCount int) *bstore.Query[store.Message] {

127 q := bstore.QueryTx[store.Message](tx)

128 q.FilterEqual("MailboxOrigID", m.MailboxID)

129 q.FilterEqual("Expunged", false)

130 q.FilterFn(func(m store.Message) bool {

131 return m.Junk || m.Notjunk

132 })

133 if fm != nil {

134 q.FilterNonzero(*fm)

135 }

136 q.FilterGreaterEqual("Received", now.Add(-maxAge))

137 q.Limit(maxCount)

138 q.SortDesc("Received")

139 return q

140 }

141

142 // Execute the query, returning messages or returning error through panic.

143 xmessageList := func(q *bstore.Query[store.Message], descr string) []store.Message {

144 t0 := time.Now()

145 l, err := q.List()

146 log.Debugx("querying messages for reputation", err,

147 slog.Int("msgs", len(l)),

148 slog.String("descr", descr),

149 slog.Duration("queryduration", time.Since(t0)))

150 if err != nil {

151 panic(queryError(fmt.Sprintf("listing messages: %v", err)))

152 }

153 return l

154 }

155

156 xrecipientExists := func(q *bstore.Query[store.Recipient]) bool {

157 exists, err := q.Exists()

158 if err != nil {

159 panic(queryError(fmt.Sprintf("checking for recipient: %v", err)))

160 }

161 return exists

162 }

163

164 const year = 365 * 24 * time.Hour

165

166 // Look for historic messages with same "message from" address. We'll

167 // treat any validation (strict/dmarc/relaxed) the same, but "none"

168 // separately.

169 //

170 // We only need 1 message, and sometimes look at a second message. If

171 // the last message or the message before was an accept, we accept. If

172 // the single last or last two were a reject, we reject.

173 //

174 // If there was no validation, any signal is inconclusive.

175 if m.MsgFromDomain != "" {

176 q := messageQuery(&store.Message{MsgFromLocalpart: m.MsgFromLocalpart, MsgFromDomain: m.MsgFromDomain}, 3*year, 2)

177 q.FilterEqual("MsgFromValidated", m.MsgFromValidated)

178 msgs := xmessageList(q, "mgsfromfull")

179 if len(msgs) > 0 {

180 // todo: we may want to look at dkim/spf in this case.

181 spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)

182 conclusive := m.MsgFromValidated

183 return &spam, conclusive, methodMsgfromFull, nil

184 }

185 if !m.MsgFromValidated {

186 // Look for historic messages that were validated. If present, this is likely spam.

187 // Only return as conclusively spam if history also says this From-address sent

188 // spam.

189 q := messageQuery(&store.Message{MsgFromLocalpart: m.MsgFromLocalpart, MsgFromDomain: m.MsgFromDomain, MsgFromValidated: true}, 3*year, 2)

190 msgs = xmessageList(q, "msgfromfull-validated")

191 if len(msgs) > 0 {

192 spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)

193 return xtrue, spam, methodMsgfromFull, nil

194 }

195 }

196

197 // Look if we ever sent to this address. If so, we accept,

198 qr := bstore.QueryTx[store.Recipient](tx)

199 qr.FilterEqual("Localpart", m.MsgFromLocalpart)

200 qr.FilterEqual("Domain", m.MsgFromDomain)

201 qr.FilterGreaterEqual("Sent", now.Add(-3*year))

202 if xrecipientExists(qr) {

203 return xfalse, true, methodMsgtoFull, nil

204 }

205

206 // Look for domain match, then for organizational domain match.

207 for _, orgdomain := range []bool{false, true} {

208 qm := store.Message{}

209 var method reputationMethod

210 var descr string

211 if orgdomain {

212 qm.MsgFromOrgDomain = m.MsgFromOrgDomain

213 method = methodMsgfromOrgDomain

214 descr = "msgfromorgdomain"

215 } else {

216 qm.MsgFromDomain = m.MsgFromDomain

217 method = methodMsgfromDomain

218 descr = "msgfromdomain"

219 }

220

221 q := messageQuery(&qm, 2*year, 20)

222 q.FilterEqual("MsgFromValidated", m.MsgFromValidated)

223 msgs := xmessageList(q, descr)

224 if len(msgs) > 0 {

225 nonjunk := 0

226 for _, m := range msgs {

227 if !m.Junk {

228 nonjunk++

229 }

230 }

231 if 100*nonjunk/len(msgs) > 80 {

232 return xfalse, true, method, nil

233 }

234 if nonjunk == 0 {

235 // Only conclusive with at least 3 different localparts.

236 localparts := map[smtp.Localpart]struct{}{}

237 for _, m := range msgs {

238 localparts[m.MsgFromLocalpart] = struct{}{}

239 if len(localparts) == 3 {

240 return xtrue, true, method, nil

241 }

242 }

243 return xtrue, false, method, nil

244 }

245 // Mixed signals from domain. We don't want to block a new sender.

246 return nil, false, method, nil

247 }

248 if !m.MsgFromValidated {

249 // Look for historic messages that were validated. If present, this is likely spam.

250 // Only return as conclusively spam if history also says this From-address sent

251 // spam.

252 q := messageQuery(&qm, 2*year, 2)

253 q.FilterEqual("MsgFromValidated", true)

254 msgs = xmessageList(q, descr+"-validated")

255 if len(msgs) > 0 {

256 spam := msgs[0].Junk && (len(msgs) == 1 || msgs[1].Junk)

257 return xtrue, spam, method, nil

258 }

259 }

260

261 // Look if we ever sent to this address. If so, we accept,

262 qr := bstore.QueryTx[store.Recipient](tx)

263 if orgdomain {

264 qr.FilterEqual("OrgDomain", m.MsgFromOrgDomain)

265 method = methodMsgtoOrgDomain

266 } else {

267 qr.FilterEqual("Domain", m.MsgFromDomain)

268 method = methodMsgtoDomain

269 }

270 qr.FilterGreaterEqual("Sent", now.Add(-2*year))

271 if xrecipientExists(qr) {

272 return xfalse, true, method, nil

273 }

274 }

275 }

276

277 // DKIM and SPF.

278 // We only use identities that passed validation. Failed identities are ignored. ../rfc/6376:2447

279 // todo future: we could do something with the DKIM identity (i=) field if it is more specific than just the domain (d=).

280 dkimspfsignals := []float64{}

281 dkimspfmsgs := 0

282 for _, dom := range m.DKIMDomains {

283 q := messageQuery(nil, year/2, 50)

284 q.FilterIn("DKIMDomains", dom)

285 msgs := xmessageList(q, "dkimdomain")

286 if len(msgs) > 0 {

287 nspam := 0

288 for _, m := range msgs {

289 if m.Junk {

290 nspam++

291 }

292 }

293 pspam := float64(nspam) / float64(len(msgs))

294 dkimspfsignals = append(dkimspfsignals, pspam)

295 dkimspfmsgs = len(msgs)

296 }

297 }

298 if m.MailFromValidated || m.EHLOValidated {

299 var msgs []store.Message

300 if m.MailFromValidated && m.MailFromDomain != "" {

301 q := messageQuery(&store.Message{MailFromLocalpart: m.MailFromLocalpart, MailFromDomain: m.MailFromDomain}, year/2, 50)

302 msgs = xmessageList(q, "mailfrom")

303 if len(msgs) == 0 {

304 q := messageQuery(&store.Message{MailFromDomain: m.MailFromDomain}, year/2, 50)

305 msgs = xmessageList(q, "mailfromdomain")

306 }

307 }

308 if len(msgs) == 0 && m.EHLOValidated && m.EHLODomain != "" {

309 q := messageQuery(&store.Message{EHLODomain: m.EHLODomain}, year/2, 50)

310 msgs = xmessageList(q, "ehlodomain")

311 }

312 if len(msgs) > 0 {

313 nspam := 0

314 for _, m := range msgs {

315 if m.Junk {

316 nspam++

317 }

318 }

319 pspam := float64(nspam) / float64(len(msgs))

320 dkimspfsignals = append(dkimspfsignals, pspam)

321 if len(msgs) > dkimspfmsgs {

322 dkimspfmsgs = len(msgs)

323 }

324 }

325 }

326 if len(dkimspfsignals) > 0 {

327 var nham, nspam int

328 for _, p := range dkimspfsignals {

329 if p < .1 {

330 nham++

331 } else if p > .9 {

332 nspam++

333 }

334 }

335 if nham > 0 && nspam == 0 {

336 return xfalse, true, methodDKIMSPF, nil

337 }

338 if nspam > 0 && nham == 0 {

339 return xtrue, dkimspfmsgs > 1, methodDKIMSPF, nil

340 }

341 return nil, false, methodDKIMSPF, nil

342 }

343

344 // IP-based. A wider mask needs more messages to be conclusive.

345 // We require the resulting signal to be strong, i.e. likely ham or likely spam.

346 var msgs []store.Message

347 var need int

348 var method reputationMethod

349 if m.RemoteIPMasked1 != "" {

350 q := messageQuery(&store.Message{RemoteIPMasked1: m.RemoteIPMasked1}, year/4, 50)

351 msgs = xmessageList(q, "ip1")

352 need = 2

353 method = methodIP1

354 }

355 if len(msgs) == 0 && m.RemoteIPMasked2 != "" {

356 q := messageQuery(&store.Message{RemoteIPMasked2: m.RemoteIPMasked2}, year/4, 50)

357 msgs = xmessageList(q, "ip2")

358 need = 5

359 method = methodIP2

360 }

361 if len(msgs) == 0 && m.RemoteIPMasked3 != "" {

362 q := messageQuery(&store.Message{RemoteIPMasked3: m.RemoteIPMasked3}, year/4, 50)

363 msgs = xmessageList(q, "ip3")

364 need = 10

365 method = methodIP3

366 }

367 if len(msgs) > 0 {

368 nspam := 0

369 for _, m := range msgs {

370 if m.Junk {

371 nspam++

372 }

373 }

374 pspam := float64(nspam) / float64(len(msgs))

375 var spam *bool

376 if pspam < .25 {

377 spam = xfalse

378 } else if pspam > .75 {

379 spam = xtrue

380 }

381 conclusive := len(msgs) >= need && (pspam <= 0.1 || pspam >= 0.9)

382 return spam, conclusive, method, nil

383 }

384

385 return nil, false, methodNone, nil

386}

387