1package store
2
3import (
4 "bufio"
5 "bytes"
6 "errors"
7 "fmt"
8 "io"
9 "log/slog"
10 "os"
11 "path/filepath"
12 "strconv"
13 "strings"
14 "time"
15
16 "golang.org/x/exp/maps"
17
18 "github.com/mjl-/mox/mlog"
19)
20
21// MsgSource is implemented by readers for mailbox file formats.
22type MsgSource interface {
23 // Return next message, or io.EOF when there are no more.
24 Next() (*Message, *os.File, string, error)
25}
26
27// MboxReader reads messages from an mbox file, implementing MsgSource.
28type MboxReader struct {
29 log mlog.Log
30 createTemp func(log mlog.Log, pattern string) (*os.File, error)
31 path string
32 line int
33 r *bufio.Reader
34 prevempty bool
35 nonfirst bool
36 eof bool
37 fromLine string // "From "-line for this message.
38 header bool // Now in header section.
39}
40
41func NewMboxReader(log mlog.Log, createTemp func(log mlog.Log, pattern string) (*os.File, error), filename string, r io.Reader) *MboxReader {
42 return &MboxReader{
43 log: log,
44 createTemp: createTemp,
45 path: filename,
46 line: 1,
47 r: bufio.NewReader(r),
48 }
49}
50
51// Position returns "<filename>:<lineno>" for the current position.
52func (mr *MboxReader) Position() string {
53 return fmt.Sprintf("%s:%d", mr.path, mr.line)
54}
55
56// Next returns the next message read from the mbox file. The file is a temporary
57// file and must be removed/consumed. The third return value is the position in the
58// file.
59func (mr *MboxReader) Next() (*Message, *os.File, string, error) {
60 if mr.eof {
61 return nil, nil, "", io.EOF
62 }
63
64 from := []byte("From ")
65
66 if !mr.nonfirst {
67 mr.header = true
68 // First read, we're at the beginning of the file.
69 line, err := mr.r.ReadBytes('\n')
70 if err == io.EOF {
71 return nil, nil, "", io.EOF
72 }
73 mr.line++
74
75 if !bytes.HasPrefix(line, from) {
76 return nil, nil, mr.Position(), fmt.Errorf(`first line does not start with "From "`)
77 }
78 mr.nonfirst = true
79 mr.fromLine = strings.TrimSpace(string(line))
80 }
81
82 f, err := mr.createTemp(mr.log, "mboxreader")
83 if err != nil {
84 return nil, nil, mr.Position(), err
85 }
86 defer func() {
87 if f != nil {
88 CloseRemoveTempFile(mr.log, f, "message after mbox read error")
89 }
90 }()
91
92 fromLine := mr.fromLine
93 bf := bufio.NewWriter(f)
94 var flags Flags
95 keywords := map[string]bool{}
96 var size int64
97 for {
98 line, err := mr.r.ReadBytes('\n')
99 if err != nil && err != io.EOF {
100 return nil, nil, mr.Position(), fmt.Errorf("reading from mbox: %v", err)
101 }
102 if len(line) > 0 {
103 mr.line++
104 // We store data with crlf, adjust any imported messages with bare newlines.
105 if !bytes.HasSuffix(line, []byte("\r\n")) {
106 line = append(line[:len(line)-1], "\r\n"...)
107 }
108
109 if mr.header {
110 // See https://doc.dovecot.org/admin_manual/mailbox_formats/mbox/
111 if bytes.HasPrefix(line, []byte("Status:")) {
112 s := strings.TrimSpace(strings.SplitN(string(line), ":", 2)[1])
113 for _, c := range s {
114 switch c {
115 case 'R':
116 flags.Seen = true
117 }
118 }
119 } else if bytes.HasPrefix(line, []byte("X-Status:")) {
120 s := strings.TrimSpace(strings.SplitN(string(line), ":", 2)[1])
121 for _, c := range s {
122 switch c {
123 case 'A':
124 flags.Answered = true
125 case 'F':
126 flags.Flagged = true
127 case 'T':
128 flags.Draft = true
129 case 'D':
130 flags.Deleted = true
131 }
132 }
133 } else if bytes.HasPrefix(line, []byte("X-Keywords:")) {
134 s := strings.TrimSpace(strings.SplitN(string(line), ":", 2)[1])
135 for _, t := range strings.Split(s, ",") {
136 word := strings.ToLower(strings.TrimSpace(t))
137 switch word {
138 case "forwarded", "$forwarded":
139 flags.Forwarded = true
140 case "junk", "$junk":
141 flags.Junk = true
142 case "notjunk", "$notjunk", "nonjunk", "$nonjunk":
143 flags.Notjunk = true
144 case "phishing", "$phishing":
145 flags.Phishing = true
146 case "mdnsent", "$mdnsent":
147 flags.MDNSent = true
148 default:
149 if err := CheckKeyword(word); err == nil {
150 keywords[word] = true
151 }
152 }
153 }
154 }
155 }
156 if bytes.Equal(line, []byte("\r\n")) {
157 mr.header = false
158 }
159
160 // Next mail message starts at bare From word.
161 if mr.prevempty && bytes.HasPrefix(line, from) {
162 mr.fromLine = strings.TrimSpace(string(line))
163 mr.header = true
164 break
165 }
166 if bytes.HasPrefix(line, []byte(">")) && bytes.HasPrefix(bytes.TrimLeft(line, ">"), []byte("From ")) {
167 line = line[1:]
168 }
169 n, err := bf.Write(line)
170 if err != nil {
171 return nil, nil, mr.Position(), fmt.Errorf("writing message to file: %v", err)
172 }
173 size += int64(n)
174 mr.prevempty = bytes.Equal(line, []byte("\r\n"))
175 }
176 if err == io.EOF {
177 mr.eof = true
178 break
179 }
180 }
181 if err := bf.Flush(); err != nil {
182 return nil, nil, mr.Position(), fmt.Errorf("flush: %v", err)
183 }
184
185 m := &Message{Flags: flags, Keywords: maps.Keys(keywords), Size: size}
186
187 if t := strings.SplitN(fromLine, " ", 3); len(t) == 3 {
188 layouts := []string{time.ANSIC, time.UnixDate, time.RubyDate}
189 for _, l := range layouts {
190 t, err := time.Parse(l, t[2])
191 if err == nil {
192 m.Received = t
193 break
194 }
195 }
196 }
197
198 // Prevent cleanup by defer.
199 mf := f
200 f = nil
201
202 return m, mf, mr.Position(), nil
203}
204
205type MaildirReader struct {
206 log mlog.Log
207 createTemp func(log mlog.Log, pattern string) (*os.File, error)
208 newf, curf *os.File
209 f *os.File // File we are currently reading from. We first read newf, then curf.
210 dir string // Name of directory for f. Can be empty on first call.
211 entries []os.DirEntry
212 dovecotFlags []string // Lower-case flags/keywords.
213}
214
215func NewMaildirReader(log mlog.Log, createTemp func(log mlog.Log, pattern string) (*os.File, error), newf, curf *os.File) *MaildirReader {
216 mr := &MaildirReader{
217 log: log,
218 createTemp: createTemp,
219 newf: newf,
220 curf: curf,
221 f: newf,
222 }
223
224 // Best-effort parsing of dovecot keywords.
225 kf, err := os.Open(filepath.Join(filepath.Dir(newf.Name()), "dovecot-keywords"))
226 if err == nil {
227 mr.dovecotFlags, err = ParseDovecotKeywordsFlags(kf, log)
228 log.Check(err, "parsing dovecot keywords file")
229 err = kf.Close()
230 log.Check(err, "closing dovecot-keywords file")
231 }
232
233 return mr
234}
235
236func (mr *MaildirReader) Next() (*Message, *os.File, string, error) {
237 if mr.dir == "" {
238 mr.dir = mr.f.Name()
239 }
240
241 if len(mr.entries) == 0 {
242 var err error
243 mr.entries, err = mr.f.ReadDir(100)
244 if err != nil && err != io.EOF {
245 return nil, nil, "", err
246 }
247 if len(mr.entries) == 0 {
248 if mr.f == mr.curf {
249 return nil, nil, "", io.EOF
250 }
251 mr.f = mr.curf
252 mr.dir = ""
253 return mr.Next()
254 }
255 }
256
257 p := filepath.Join(mr.dir, mr.entries[0].Name())
258 mr.entries = mr.entries[1:]
259 sf, err := os.Open(p)
260 if err != nil {
261 return nil, nil, p, fmt.Errorf("open message in maildir: %s", err)
262 }
263 defer func() {
264 err := sf.Close()
265 mr.log.Check(err, "closing message file after error")
266 }()
267 f, err := mr.createTemp(mr.log, "maildirreader")
268 if err != nil {
269 return nil, nil, p, err
270 }
271 defer func() {
272 if f != nil {
273 name := f.Name()
274 err := f.Close()
275 mr.log.Check(err, "closing temporary message file after maildir read error")
276 err = os.Remove(name)
277 mr.log.Check(err, "removing temporary message file after maildir read error", slog.String("path", name))
278 }
279 }()
280
281 // Copy data, changing bare \n into \r\n.
282 r := bufio.NewReader(sf)
283 w := bufio.NewWriter(f)
284 var size int64
285 for {
286 line, err := r.ReadBytes('\n')
287 if err != nil && err != io.EOF {
288 return nil, nil, p, fmt.Errorf("reading message: %v", err)
289 }
290 if len(line) > 0 {
291 if !bytes.HasSuffix(line, []byte("\r\n")) {
292 line = append(line[:len(line)-1], "\r\n"...)
293 }
294
295 if n, err := w.Write(line); err != nil {
296 return nil, nil, p, fmt.Errorf("writing message: %v", err)
297 } else {
298 size += int64(n)
299 }
300 }
301 if err == io.EOF {
302 break
303 }
304 }
305 if err := w.Flush(); err != nil {
306 return nil, nil, p, fmt.Errorf("writing message: %v", err)
307 }
308
309 // Take received time from filename, falling back to mtime for maildirs
310 // reconstructed some other sources of message files.
311 var received time.Time
312 t := strings.SplitN(filepath.Base(sf.Name()), ".", 3)
313 if v, err := strconv.ParseInt(t[0], 10, 64); len(t) == 3 && err == nil {
314 received = time.Unix(v, 0)
315 } else if fi, err := sf.Stat(); err == nil {
316 received = fi.ModTime()
317 }
318
319 // Parse flags. See https://cr.yp.to/proto/maildir.html.
320 flags := Flags{}
321 keywords := map[string]bool{}
322 t = strings.SplitN(filepath.Base(sf.Name()), ":2,", 2)
323 if len(t) == 2 {
324 for _, c := range t[1] {
325 switch c {
326 case 'P':
327 // Passed, doesn't map to a common IMAP flag.
328 case 'R':
329 flags.Answered = true
330 case 'S':
331 flags.Seen = true
332 case 'T':
333 flags.Deleted = true
334 case 'D':
335 flags.Draft = true
336 case 'F':
337 flags.Flagged = true
338 default:
339 if c >= 'a' && c <= 'z' {
340 index := int(c - 'a')
341 if index >= len(mr.dovecotFlags) {
342 continue
343 }
344 kw := mr.dovecotFlags[index]
345 switch kw {
346 case "$forwarded", "forwarded":
347 flags.Forwarded = true
348 case "$junk", "junk":
349 flags.Junk = true
350 case "$notjunk", "notjunk", "nonjunk":
351 flags.Notjunk = true
352 case "$mdnsent", "mdnsent":
353 flags.MDNSent = true
354 case "$phishing", "phishing":
355 flags.Phishing = true
356 default:
357 keywords[kw] = true
358 }
359 }
360 }
361 }
362 }
363
364 m := &Message{Received: received, Flags: flags, Keywords: maps.Keys(keywords), Size: size}
365
366 // Prevent cleanup by defer.
367 mf := f
368 f = nil
369
370 return m, mf, p, nil
371}
372
373// ParseDovecotKeywordsFlags attempts to parse a dovecot-keywords file. It only
374// returns valid flags/keywords, as lower-case. If an error is encountered and
375// returned, any keywords that were found are still returned. The returned list has
376// both system/well-known flags and custom keywords.
377func ParseDovecotKeywordsFlags(r io.Reader, log mlog.Log) ([]string, error) {
378 /*
379 If the dovecot-keywords file is present, we parse its additional flags, see
380 https://doc.dovecot.org/admin_manual/mailbox_formats/maildir/
381
382 0 Old
383 1 Junk
384 2 NonJunk
385 3 $Forwarded
386 4 $Junk
387 */
388 keywords := make([]string, 26)
389 end := 0
390 scanner := bufio.NewScanner(r)
391 var errs []string
392 for scanner.Scan() {
393 s := scanner.Text()
394 t := strings.SplitN(s, " ", 2)
395 if len(t) != 2 {
396 errs = append(errs, fmt.Sprintf("unexpected dovecot keyword line: %q", s))
397 continue
398 }
399 v, err := strconv.ParseInt(t[0], 10, 32)
400 if err != nil {
401 errs = append(errs, fmt.Sprintf("unexpected dovecot keyword index: %q", s))
402 continue
403 }
404 if v < 0 || v >= int64(len(keywords)) {
405 errs = append(errs, fmt.Sprintf("dovecot keyword index too big: %q", s))
406 continue
407 }
408 index := int(v)
409 if keywords[index] != "" {
410 errs = append(errs, fmt.Sprintf("duplicate dovecot keyword: %q", s))
411 continue
412 }
413 kw := strings.ToLower(t[1])
414 if !systemWellKnownFlags[kw] {
415 if err := CheckKeyword(kw); err != nil {
416 errs = append(errs, fmt.Sprintf("invalid keyword %q", kw))
417 continue
418 }
419 }
420 keywords[index] = kw
421 if index >= end {
422 end = index + 1
423 }
424 }
425 if err := scanner.Err(); err != nil {
426 errs = append(errs, fmt.Sprintf("reading dovecot keywords file: %v", err))
427 }
428 var err error
429 if len(errs) > 0 {
430 err = errors.New(strings.Join(errs, "; "))
431 }
432 return keywords[:end], err
433}
434