1package imapserver
2
3import (
4 "fmt"
5 "net/textproto"
6 "strings"
7
8 "golang.org/x/exp/slog"
9
10 "github.com/mjl-/bstore"
11
12 "github.com/mjl-/mox/message"
13 "github.com/mjl-/mox/store"
14)
15
16// Search returns messages matching criteria specified in parameters.
17//
18// State: Selected
19func (c *conn) cmdxSearch(isUID bool, tag, cmd string, p *parser) {
20 // Command: ../rfc/9051:3716 ../rfc/4731:31 ../rfc/4466:354 ../rfc/3501:2723
21 // Examples: ../rfc/9051:3986 ../rfc/4731:153 ../rfc/3501:2975
22 // Syntax: ../rfc/9051:6918 ../rfc/4466:611 ../rfc/3501:4954
23
24 // We will respond with ESEARCH instead of SEARCH if "RETURN" is present or for IMAP4rev2.
25 var eargs map[string]bool // Options except SAVE. Nil means old-style SEARCH response.
26 var save bool // For SAVE option. Kept separately for easier handling of MIN/MAX later.
27
28 // IMAP4rev2 always returns ESEARCH, even with absent RETURN.
29 if c.enabled[capIMAP4rev2] {
30 eargs = map[string]bool{}
31 }
32 // ../rfc/9051:6967
33 if p.take(" RETURN (") {
34 eargs = map[string]bool{}
35
36 for !p.take(")") {
37 if len(eargs) > 0 || save {
38 p.xspace()
39 }
40 if w, ok := p.takelist("MIN", "MAX", "ALL", "COUNT", "SAVE"); ok {
41 if w == "SAVE" {
42 save = true
43 } else {
44 eargs[w] = true
45 }
46 } else {
47 // ../rfc/4466:378 ../rfc/9051:3745
48 xsyntaxErrorf("ESEARCH result option %q not supported", w)
49 }
50 }
51 }
52 // ../rfc/4731:149 ../rfc/9051:3737
53 if eargs != nil && len(eargs) == 0 && !save {
54 eargs["ALL"] = true
55 }
56
57 // If UTF8=ACCEPT is enabled, we should not accept any charset. We are a bit more
58 // relaxed (reasonable?) and still allow US-ASCII and UTF-8. ../rfc/6855:198
59 if p.take(" CHARSET ") {
60 charset := strings.ToUpper(p.xastring())
61 if charset != "US-ASCII" && charset != "UTF-8" {
62 // ../rfc/3501:2771 ../rfc/9051:3836
63 xusercodeErrorf("BADCHARSET", "only US-ASCII and UTF-8 supported")
64 }
65 }
66 p.xspace()
67 sk := &searchKey{
68 searchKeys: []searchKey{*p.xsearchKey()},
69 }
70 for !p.empty() {
71 p.xspace()
72 sk.searchKeys = append(sk.searchKeys, *p.xsearchKey())
73 }
74
75 // Even in case of error, we ensure search result is changed.
76 if save {
77 c.searchResult = []store.UID{}
78 }
79
80 // We gather word and not-word searches from the top-level, turn them
81 // into a WordSearch for a more efficient search.
82 // todo optimize: also gather them out of AND searches.
83 var textWords, textNotWords, bodyWords, bodyNotWords []string
84 n := 0
85 for _, xsk := range sk.searchKeys {
86 switch xsk.op {
87 case "BODY":
88 bodyWords = append(bodyWords, xsk.astring)
89 continue
90 case "TEXT":
91 textWords = append(textWords, xsk.astring)
92 continue
93 case "NOT":
94 switch xsk.searchKey.op {
95 case "BODY":
96 bodyNotWords = append(bodyNotWords, xsk.searchKey.astring)
97 continue
98 case "TEXT":
99 textNotWords = append(textNotWords, xsk.searchKey.astring)
100 continue
101 }
102 }
103 sk.searchKeys[n] = xsk
104 n++
105 }
106 // We may be left with an empty but non-nil sk.searchKeys, which is important for
107 // matching.
108 sk.searchKeys = sk.searchKeys[:n]
109 var bodySearch, textSearch *store.WordSearch
110 if len(bodyWords) > 0 || len(bodyNotWords) > 0 {
111 ws := store.PrepareWordSearch(bodyWords, bodyNotWords)
112 bodySearch = &ws
113 }
114 if len(textWords) > 0 || len(textNotWords) > 0 {
115 ws := store.PrepareWordSearch(textWords, textNotWords)
116 textSearch = &ws
117 }
118
119 // Note: we only hold the account rlock for verifying the mailbox at the start.
120 c.account.RLock()
121 runlock := c.account.RUnlock
122 // Note: in a defer because we replace it below.
123 defer func() {
124 runlock()
125 }()
126
127 // If we only have a MIN and/or MAX, we can stop processing as soon as we
128 // have those matches.
129 var min, max int
130 if eargs["MIN"] {
131 min = 1
132 }
133 if eargs["MAX"] {
134 max = 1
135 }
136
137 var expungeIssued bool
138 var maxModSeq store.ModSeq
139
140 var uids []store.UID
141 c.xdbread(func(tx *bstore.Tx) {
142 c.xmailboxID(tx, c.mailboxID) // Validate.
143 runlock()
144 runlock = func() {}
145
146 // Normal forward search when we don't have MAX only.
147 var lastIndex = -1
148 if eargs == nil || max == 0 || len(eargs) != 1 {
149 for i, uid := range c.uids {
150 lastIndex = i
151 if match, modseq := c.searchMatch(tx, msgseq(i+1), uid, *sk, bodySearch, textSearch, &expungeIssued); match {
152 uids = append(uids, uid)
153 if modseq > maxModSeq {
154 maxModSeq = modseq
155 }
156 if min == 1 && min+max == len(eargs) {
157 break
158 }
159 }
160 }
161 }
162 // And reverse search for MAX if we have only MAX or MAX combined with MIN.
163 if max == 1 && (len(eargs) == 1 || min+max == len(eargs)) {
164 for i := len(c.uids) - 1; i > lastIndex; i-- {
165 if match, modseq := c.searchMatch(tx, msgseq(i+1), c.uids[i], *sk, bodySearch, textSearch, &expungeIssued); match {
166 uids = append(uids, c.uids[i])
167 if modseq > maxModSeq {
168 maxModSeq = modseq
169 }
170 break
171 }
172 }
173 }
174 })
175
176 if eargs == nil {
177 // In IMAP4rev1, an untagged SEARCH response is required. ../rfc/3501:2728
178 if len(uids) == 0 {
179 c.bwritelinef("* SEARCH")
180 }
181
182 // Old-style SEARCH response. We must spell out each number. So we may be splitting
183 // into multiple responses. ../rfc/9051:6809 ../rfc/3501:4833
184 for len(uids) > 0 {
185 n := len(uids)
186 if n > 100 {
187 n = 100
188 }
189 s := ""
190 for _, v := range uids[:n] {
191 if !isUID {
192 v = store.UID(c.xsequence(v))
193 }
194 s += " " + fmt.Sprintf("%d", v)
195 }
196
197 // Since we don't have the max modseq for the possibly partial uid range we're
198 // writing here within hand reach, we conveniently interpret the ambiguous "for all
199 // messages being returned" in ../rfc/7162:1107 as meaning over all lines that we
200 // write. And that clients only commit this value after they have seen the tagged
201 // end of the command. Appears to be recommended behaviour, ../rfc/7162:2323.
202 // ../rfc/7162:1077 ../rfc/7162:1101
203 var modseq string
204 if sk.hasModseq() {
205 // ../rfc/7162:2557
206 modseq = fmt.Sprintf(" (MODSEQ %d)", maxModSeq.Client())
207 }
208
209 c.bwritelinef("* SEARCH%s%s", s, modseq)
210 uids = uids[n:]
211 }
212 } else {
213 // New-style ESEARCH response syntax: ../rfc/9051:6546 ../rfc/4466:522
214
215 if save {
216 // ../rfc/9051:3784 ../rfc/5182:13
217 c.searchResult = uids
218 if sanityChecks {
219 checkUIDs(c.searchResult)
220 }
221 }
222
223 // No untagged ESEARCH response if nothing was requested. ../rfc/9051:4160
224 if len(eargs) > 0 {
225 // The tag was originally a string, became an astring in IMAP4rev2, better stick to
226 // string. ../rfc/4466:707 ../rfc/5259:1163 ../rfc/9051:7087
227 resp := fmt.Sprintf(`* ESEARCH (TAG "%s")`, tag)
228 if isUID {
229 resp += " UID"
230 }
231
232 // NOTE: we are converting UIDs to msgseq in the uids slice (if needed) while
233 // keeping the "uids" name!
234 if !isUID {
235 // If searchResult is hanging on to the slice, we need to work on a copy.
236 if save {
237 nuids := make([]store.UID, len(uids))
238 copy(nuids, uids)
239 uids = nuids
240 }
241 for i, uid := range uids {
242 uids[i] = store.UID(c.xsequence(uid))
243 }
244 }
245
246 // If no matches, then no MIN/MAX response. ../rfc/4731:98 ../rfc/9051:3758
247 if eargs["MIN"] && len(uids) > 0 {
248 resp += fmt.Sprintf(" MIN %d", uids[0])
249 }
250 if eargs["MAX"] && len(uids) > 0 {
251 resp += fmt.Sprintf(" MAX %d", uids[len(uids)-1])
252 }
253 if eargs["COUNT"] {
254 resp += fmt.Sprintf(" COUNT %d", len(uids))
255 }
256 if eargs["ALL"] && len(uids) > 0 {
257 resp += fmt.Sprintf(" ALL %s", compactUIDSet(uids).String())
258 }
259
260 // Interaction between ESEARCH and CONDSTORE: ../rfc/7162:1211 ../rfc/4731:273
261 // Summary: send the highest modseq of the returned messages.
262 if sk.hasModseq() && len(uids) > 0 {
263 resp += fmt.Sprintf(" MODSEQ %d", maxModSeq.Client())
264 }
265
266 c.bwritelinef("%s", resp)
267 }
268 }
269 if expungeIssued {
270 // ../rfc/9051:5102
271 c.writeresultf("%s OK [EXPUNGEISSUED] done", tag)
272 } else {
273 c.ok(tag, cmd)
274 }
275}
276
277type search struct {
278 c *conn
279 tx *bstore.Tx
280 seq msgseq
281 uid store.UID
282 mr *store.MsgReader
283 m store.Message
284 p *message.Part
285 expungeIssued *bool
286 hasModseq bool
287}
288
289func (c *conn) searchMatch(tx *bstore.Tx, seq msgseq, uid store.UID, sk searchKey, bodySearch, textSearch *store.WordSearch, expungeIssued *bool) (bool, store.ModSeq) {
290 s := search{c: c, tx: tx, seq: seq, uid: uid, expungeIssued: expungeIssued, hasModseq: sk.hasModseq()}
291 defer func() {
292 if s.mr != nil {
293 err := s.mr.Close()
294 c.xsanity(err, "closing messagereader")
295 s.mr = nil
296 }
297 }()
298 return s.match(sk, bodySearch, textSearch)
299}
300
301func (s *search) match(sk searchKey, bodySearch, textSearch *store.WordSearch) (match bool, modseq store.ModSeq) {
302 // Instead of littering all the cases in match0 with calls to get modseq, we do it once
303 // here in case of a match.
304 defer func() {
305 if match && s.hasModseq {
306 if s.m.ID == 0 {
307 match = s.xensureMessage()
308 }
309 modseq = s.m.ModSeq
310 }
311 }()
312
313 match = s.match0(sk)
314 if match && bodySearch != nil {
315 if !s.xensurePart() {
316 match = false
317 return
318 }
319 var err error
320 match, err = bodySearch.MatchPart(s.c.log, s.p, false)
321 xcheckf(err, "search words in bodies")
322 }
323 if match && textSearch != nil {
324 if !s.xensurePart() {
325 match = false
326 return
327 }
328 var err error
329 match, err = textSearch.MatchPart(s.c.log, s.p, true)
330 xcheckf(err, "search words in headers and bodies")
331 }
332 return
333}
334
335func (s *search) xensureMessage() bool {
336 if s.m.ID > 0 {
337 return true
338 }
339
340 q := bstore.QueryTx[store.Message](s.tx)
341 q.FilterNonzero(store.Message{MailboxID: s.c.mailboxID, UID: s.uid})
342 m, err := q.Get()
343 if err == bstore.ErrAbsent || err == nil && m.Expunged {
344 // ../rfc/2180:607
345 *s.expungeIssued = true
346 return false
347 }
348 xcheckf(err, "get message")
349 s.m = m
350 return true
351}
352
353// ensure message, reader and part are loaded. returns whether that was
354// successful.
355func (s *search) xensurePart() bool {
356 if s.mr != nil {
357 return s.p != nil
358 }
359
360 if !s.xensureMessage() {
361 return false
362 }
363
364 // Closed by searchMatch after all (recursive) search.match calls are finished.
365 s.mr = s.c.account.MessageReader(s.m)
366
367 if s.m.ParsedBuf == nil {
368 s.c.log.Error("missing parsed message")
369 return false
370 }
371 p, err := s.m.LoadPart(s.mr)
372 xcheckf(err, "load parsed message")
373 s.p = &p
374 return true
375}
376
377func (s *search) match0(sk searchKey) bool {
378 c := s.c
379
380 // Difference between sk.searchKeys nil and length 0 is important. Because we take
381 // out word/notword searches, the list may be empty but non-nil.
382 if sk.searchKeys != nil {
383 for _, ssk := range sk.searchKeys {
384 if !s.match0(ssk) {
385 return false
386 }
387 }
388 return true
389 } else if sk.seqSet != nil {
390 return sk.seqSet.containsSeq(s.seq, c.uids, c.searchResult)
391 }
392
393 filterHeader := func(field, value string) bool {
394 lower := strings.ToLower(value)
395 h, err := s.p.Header()
396 if err != nil {
397 c.log.Debugx("parsing message header", err, slog.Any("uid", s.uid))
398 return false
399 }
400 for _, v := range h.Values(field) {
401 if strings.Contains(strings.ToLower(v), lower) {
402 return true
403 }
404 }
405 return false
406 }
407
408 // We handle ops by groups that need increasing details about the message.
409
410 switch sk.op {
411 case "ALL":
412 return true
413 case "NEW":
414 // We do not implement the RECENT flag, so messages cannot be NEW.
415 return false
416 case "OLD":
417 // We treat all messages as non-recent, so this means all messages.
418 return true
419 case "RECENT":
420 // We do not implement the RECENT flag. All messages are not recent.
421 return false
422 case "NOT":
423 return !s.match0(*sk.searchKey)
424 case "OR":
425 return s.match0(*sk.searchKey) || s.match0(*sk.searchKey2)
426 case "UID":
427 return sk.uidSet.containsUID(s.uid, c.uids, c.searchResult)
428 }
429
430 // Parsed part.
431 if !s.xensurePart() {
432 return false
433 }
434
435 // Parsed message, basic info.
436 switch sk.op {
437 case "ANSWERED":
438 return s.m.Answered
439 case "DELETED":
440 return s.m.Deleted
441 case "FLAGGED":
442 return s.m.Flagged
443 case "KEYWORD":
444 kw := strings.ToLower(sk.atom)
445 switch kw {
446 case "$forwarded":
447 return s.m.Forwarded
448 case "$junk":
449 return s.m.Junk
450 case "$notjunk":
451 return s.m.Notjunk
452 case "$phishing":
453 return s.m.Phishing
454 case "$mdnsent":
455 return s.m.MDNSent
456 default:
457 for _, k := range s.m.Keywords {
458 if k == kw {
459 return true
460 }
461 }
462 return false
463 }
464 case "SEEN":
465 return s.m.Seen
466 case "UNANSWERED":
467 return !s.m.Answered
468 case "UNDELETED":
469 return !s.m.Deleted
470 case "UNFLAGGED":
471 return !s.m.Flagged
472 case "UNKEYWORD":
473 kw := strings.ToLower(sk.atom)
474 switch kw {
475 case "$forwarded":
476 return !s.m.Forwarded
477 case "$junk":
478 return !s.m.Junk
479 case "$notjunk":
480 return !s.m.Notjunk
481 case "$phishing":
482 return !s.m.Phishing
483 case "$mdnsent":
484 return !s.m.MDNSent
485 default:
486 for _, k := range s.m.Keywords {
487 if k == kw {
488 return false
489 }
490 }
491 return true
492 }
493 case "UNSEEN":
494 return !s.m.Seen
495 case "DRAFT":
496 return s.m.Draft
497 case "UNDRAFT":
498 return !s.m.Draft
499 case "BEFORE", "ON", "SINCE":
500 skdt := sk.date.Format("2006-01-02")
501 rdt := s.m.Received.Format("2006-01-02")
502 switch sk.op {
503 case "BEFORE":
504 return rdt < skdt
505 case "ON":
506 return rdt == skdt
507 case "SINCE":
508 return rdt >= skdt
509 }
510 panic("missing case")
511 case "LARGER":
512 return s.m.Size > sk.number
513 case "SMALLER":
514 return s.m.Size < sk.number
515 case "MODSEQ":
516 // ../rfc/7162:1045
517 return s.m.ModSeq.Client() >= *sk.clientModseq
518 }
519
520 if s.p == nil {
521 c.log.Info("missing parsed message, not matching", slog.Any("uid", s.uid))
522 return false
523 }
524
525 // Parsed message, more info.
526 switch sk.op {
527 case "BCC":
528 return filterHeader("Bcc", sk.astring)
529 case "BODY", "TEXT":
530 // We gathered word/notword searches from the top-level, but we can also get them
531 // nested.
532 // todo optimize: handle deeper nested word/not-word searches more efficiently.
533 headerToo := sk.op == "TEXT"
534 match, err := store.PrepareWordSearch([]string{sk.astring}, nil).MatchPart(s.c.log, s.p, headerToo)
535 xcheckf(err, "word search")
536 return match
537 case "CC":
538 return filterHeader("Cc", sk.astring)
539 case "FROM":
540 return filterHeader("From", sk.astring)
541 case "SUBJECT":
542 return filterHeader("Subject", sk.astring)
543 case "TO":
544 return filterHeader("To", sk.astring)
545 case "HEADER":
546 // ../rfc/9051:3895
547 lower := strings.ToLower(sk.astring)
548 h, err := s.p.Header()
549 if err != nil {
550 c.log.Errorx("parsing header for search", err, slog.Any("uid", s.uid))
551 return false
552 }
553 k := textproto.CanonicalMIMEHeaderKey(sk.headerField)
554 for _, v := range h.Values(k) {
555 if lower == "" || strings.Contains(strings.ToLower(v), lower) {
556 return true
557 }
558 }
559 return false
560 case "SENTBEFORE", "SENTON", "SENTSINCE":
561 if s.p.Envelope == nil || s.p.Envelope.Date.IsZero() {
562 return false
563 }
564 dt := s.p.Envelope.Date.Format("2006-01-02")
565 skdt := sk.date.Format("2006-01-02")
566 switch sk.op {
567 case "SENTBEFORE":
568 return dt < skdt
569 case "SENTON":
570 return dt == skdt
571 case "SENTSINCE":
572 return dt > skdt
573 }
574 panic("missing case")
575 }
576 panic(serverError{fmt.Errorf("missing case for search key op %q", sk.op)})
577}
578