1package store
2
3import (
4 "archive/tar"
5 "archive/zip"
6 "bufio"
7 "bytes"
8 "context"
9 "fmt"
10 "io"
11 "log/slog"
12 "os"
13 "path/filepath"
14 "strings"
15 "time"
16
17 "github.com/mjl-/bstore"
18
19 "github.com/mjl-/mox/mlog"
20 "github.com/mjl-/mox/mox-"
21)
22
23// Archiver can archive multiple mailboxes and their messages.
24type Archiver interface {
25 // Add file to archive. If name ends with a slash, it is created as a directory and
26 // the returned io.WriteCloser can be ignored.
27 Create(name string, size int64, mtime time.Time) (io.WriteCloser, error)
28 Close() error
29}
30
31// TarArchiver is an Archiver that writes to a tar file.
32type TarArchiver struct {
33 *tar.Writer
34}
35
36// Create adds a file header to the tar file.
37func (a TarArchiver) Create(name string, size int64, mtime time.Time) (io.WriteCloser, error) {
38 hdr := tar.Header{
39 Name: name,
40 Size: size,
41 Mode: 0660,
42 ModTime: mtime,
43 Format: tar.FormatPAX,
44 }
45 if err := a.WriteHeader(&hdr); err != nil {
46 return nil, err
47 }
48 return nopCloser{a}, nil
49}
50
51// ZipArchiver is an Archiver that writes to a zip file.
52type ZipArchiver struct {
53 *zip.Writer
54}
55
56// Create adds a file header to the zip file.
57func (a ZipArchiver) Create(name string, size int64, mtime time.Time) (io.WriteCloser, error) {
58 hdr := zip.FileHeader{
59 Name: name,
60 Method: zip.Deflate,
61 Modified: mtime,
62 UncompressedSize64: uint64(size),
63 }
64 w, err := a.CreateHeader(&hdr)
65 if err != nil {
66 return nil, err
67 }
68 return nopCloser{w}, nil
69}
70
71type nopCloser struct {
72 io.Writer
73}
74
75// Close does nothing.
76func (nopCloser) Close() error {
77 return nil
78}
79
80// DirArchiver is an Archiver that writes to a directory.
81type DirArchiver struct {
82 Dir string
83}
84
85// Create creates name in the file system, in dir.
86// name must always use forwarded slashes.
87func (a DirArchiver) Create(name string, size int64, mtime time.Time) (io.WriteCloser, error) {
88 isdir := strings.HasSuffix(name, "/")
89 name = strings.TrimSuffix(name, "/")
90 p := filepath.Join(a.Dir, filepath.FromSlash(name))
91 os.MkdirAll(filepath.Dir(p), 0770)
92 if isdir {
93 return nil, os.Mkdir(p, 0770)
94 }
95 return os.OpenFile(p, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0660)
96}
97
98// Close on a dir does nothing.
99func (a DirArchiver) Close() error {
100 return nil
101}
102
103// MboxArchive fakes being an archiver to which a single mbox file can be written.
104// It returns an error when a second file is added. It returns its writer for the
105// first file to be written, leaving parameters unused.
106type MboxArchiver struct {
107 Writer io.Writer
108 have bool
109}
110
111// Create returns the underlying writer for the first call, and an error on later calls.
112func (a *MboxArchiver) Create(name string, size int64, mtime time.Time) (io.WriteCloser, error) {
113 if a.have {
114 return nil, fmt.Errorf("cannot export multiple files with mbox")
115 }
116 a.have = true
117 return nopCloser{a.Writer}, nil
118}
119
120// Close on an mbox archiver does nothing.
121func (a *MboxArchiver) Close() error {
122 return nil
123}
124
125// ExportMessages writes messages to archiver. Either in maildir format, or
126// otherwise in mbox. If mailboxOpt is non-empty, all messages from that mailbox
127// are exported. If messageIDsOpt is non-empty, only those message IDs are exported.
128// If both are empty, all mailboxes and all messages are exported. mailboxOpt
129// and messageIDsOpt cannot both be non-empty.
130//
131// Some errors are not fatal and result in skipped messages. In that happens, a
132// file "errors.txt" is added to the archive describing the errors. The goal is to
133// let users export (hopefully) most messages even in the face of errors.
134func ExportMessages(ctx context.Context, log mlog.Log, db *bstore.DB, accountDir string, archiver Archiver, maildir bool, mailboxOpt string, messageIDsOpt []int64, recursive bool) error {
135 // todo optimize: should prepare next file to add to archive (can be an mbox with many messages) while writing a file to the archive (which typically compresses, which takes time).
136
137 if mailboxOpt != "" && len(messageIDsOpt) != 0 {
138 return fmt.Errorf("cannot have both mailbox and message ids")
139 }
140
141 // Start transaction without closure, we are going to close it early, but don't
142 // want to deal with declaring many variables now to be able to assign them in a
143 // closure and use them afterwards.
144 tx, err := db.Begin(ctx, false)
145 if err != nil {
146 return fmt.Errorf("transaction: %v", err)
147 }
148 defer func() {
149 err := tx.Rollback()
150 log.Check(err, "transaction rollback")
151 }()
152
153 start := time.Now()
154
155 // We keep track of errors reading message files. We continue exporting and add an
156 // errors.txt file to the archive. In case of errors, the user can get (hopefully)
157 // most of their emails, and see something went wrong. For other errors, like
158 // writing to the archiver (e.g. a browser), we abort, because we don't want to
159 // continue with useless work.
160 var errors string
161
162 if messageIDsOpt != nil {
163 var err error
164 errors, err = exportMessages(log, tx, accountDir, messageIDsOpt, archiver, maildir, start)
165 if err != nil {
166 return fmt.Errorf("exporting messages: %v", err)
167 }
168 } else {
169 // Process mailboxes sorted by name, so submaildirs come after their parent.
170 prefix := mailboxOpt + "/"
171 var trimPrefix string
172 if mailboxOpt != "" {
173 // If exporting a specific mailbox, trim its parent path from stored file names.
174 trimPrefix = mox.ParentMailboxName(mailboxOpt) + "/"
175 }
176 q := bstore.QueryTx[Mailbox](tx)
177 q.FilterEqual("Expunged", false)
178 q.FilterFn(func(mb Mailbox) bool {
179 return mailboxOpt == "" || mb.Name == mailboxOpt || recursive && strings.HasPrefix(mb.Name, prefix)
180 })
181 q.SortAsc("Name")
182 err = q.ForEach(func(mb Mailbox) error {
183 mailboxName := mb.Name
184 if trimPrefix != "" {
185 mailboxName = strings.TrimPrefix(mailboxName, trimPrefix)
186 }
187 errmsgs, err := exportMailbox(log, tx, accountDir, mb.ID, mailboxName, archiver, maildir, start)
188 if err != nil {
189 return err
190 }
191 errors += errmsgs
192 return nil
193 })
194 if err != nil {
195 return fmt.Errorf("query mailboxes: %w", err)
196 }
197 }
198
199 if errors != "" {
200 w, err := archiver.Create("errors.txt", int64(len(errors)), time.Now())
201 if err != nil {
202 log.Errorx("adding errors.txt to archive", err)
203 return err
204 }
205 if _, err := w.Write([]byte(errors)); err != nil {
206 log.Errorx("writing errors.txt to archive", err)
207 xerr := w.Close()
208 log.Check(xerr, "closing errors.txt after error")
209 return err
210 }
211 if err := w.Close(); err != nil {
212 return err
213 }
214 }
215 return nil
216}
217
218func exportMessages(log mlog.Log, tx *bstore.Tx, accountDir string, messageIDs []int64, archiver Archiver, maildir bool, start time.Time) (string, error) {
219 mbe, err := newMailboxExport(log, "Export", accountDir, archiver, start, maildir)
220 if err != nil {
221 return "", err
222 }
223 defer mbe.Cleanup()
224
225 for _, id := range messageIDs {
226 m := Message{ID: id}
227 if err := tx.Get(&m); err != nil {
228 mbe.errors += fmt.Sprintf("get message with id %d: %v\n", id, err)
229 continue
230 } else if m.Expunged {
231 mbe.errors += fmt.Sprintf("message with id %d is expunged\n", id)
232 continue
233 }
234 if err := mbe.ExportMessage(m); err != nil {
235 return mbe.errors, err
236 }
237 }
238 err = mbe.Finish()
239 return mbe.errors, err
240}
241
242func exportMailbox(log mlog.Log, tx *bstore.Tx, accountDir string, mailboxID int64, mailboxName string, archiver Archiver, maildir bool, start time.Time) (string, error) {
243 mbe, err := newMailboxExport(log, mailboxName, accountDir, archiver, start, maildir)
244 if err != nil {
245 return "", err
246 }
247 defer mbe.Cleanup()
248
249 // Fetch all messages for mailbox.
250 q := bstore.QueryTx[Message](tx)
251 q.FilterNonzero(Message{MailboxID: mailboxID})
252 q.FilterEqual("Expunged", false)
253 q.SortAsc("Received", "ID")
254 err = q.ForEach(func(m Message) error {
255 return mbe.ExportMessage(m)
256 })
257 if err != nil {
258 return mbe.errors, err
259 }
260 err = mbe.Finish()
261 return mbe.errors, err
262}
263
264// For dovecot-keyword-style flags not in standard maildir.
265type maildirFlags struct {
266 Map map[string]int
267 List []string
268}
269
270func newMaildirFlags() *maildirFlags {
271 return &maildirFlags{map[string]int{}, nil}
272}
273
274func (f *maildirFlags) Flag(flag string) string {
275 i, ok := f.Map[flag]
276 if !ok {
277 if len(f.Map) >= 26 {
278 // Max 26 flag characters.
279 return ""
280 }
281 i = len(f.Map)
282 f.Map[flag] = i
283 f.List = append(f.List, flag)
284 }
285 return string(rune('a' + i))
286}
287
288func (f *maildirFlags) Empty() bool {
289 return len(f.Map) == 0
290}
291
292type mailboxExport struct {
293 log mlog.Log
294 mailboxName string
295 accountDir string
296 archiver Archiver
297 start time.Time
298 maildir bool
299 maildirFlags *maildirFlags
300 mboxtmp *os.File
301 mboxwriter *bufio.Writer
302 errors string
303}
304
305func (e *mailboxExport) Cleanup() {
306 if e.mboxtmp != nil {
307 CloseRemoveTempFile(e.log, e.mboxtmp, "mbox")
308 }
309}
310
311func newMailboxExport(log mlog.Log, mailboxName, accountDir string, archiver Archiver, start time.Time, maildir bool) (*mailboxExport, error) {
312 mbe := mailboxExport{
313 log: log,
314 mailboxName: mailboxName,
315 accountDir: accountDir,
316 archiver: archiver,
317 start: start,
318 maildir: maildir,
319 }
320 if maildir {
321 // Create the directories that show this is a maildir.
322 mbe.maildirFlags = newMaildirFlags()
323 if _, err := archiver.Create(mailboxName+"/new/", 0, start); err != nil {
324 return nil, fmt.Errorf("adding maildir new directory: %v", err)
325 }
326 if _, err := archiver.Create(mailboxName+"/cur/", 0, start); err != nil {
327 return nil, fmt.Errorf("adding maildir cur directory: %v", err)
328 }
329 if _, err := archiver.Create(mailboxName+"/tmp/", 0, start); err != nil {
330 return nil, fmt.Errorf("adding maildir tmp directory: %v", err)
331 }
332 } else {
333 var err error
334 mbe.mboxtmp, err = os.CreateTemp("", "mox-mail-export-mbox")
335 if err != nil {
336 return nil, fmt.Errorf("creating temp mbox file: %v", err)
337 }
338 mbe.mboxwriter = bufio.NewWriter(mbe.mboxtmp)
339 }
340
341 return &mbe, nil
342}
343
344func (e *mailboxExport) ExportMessage(m Message) error {
345 mp := filepath.Join(e.accountDir, "msg", MessagePath(m.ID))
346 var mr io.ReadCloser
347 if m.Size == int64(len(m.MsgPrefix)) {
348 mr = io.NopCloser(bytes.NewReader(m.MsgPrefix))
349 } else {
350 mf, err := os.Open(mp)
351 if err != nil {
352 e.errors += fmt.Sprintf("open message file for id %d, path %s: %v (message skipped)\n", m.ID, mp, err)
353 return nil
354 }
355 defer func() {
356 err := mf.Close()
357 e.log.Check(err, "closing message file after export")
358 }()
359 st, err := mf.Stat()
360 if err != nil {
361 e.errors += fmt.Sprintf("stat message file for id %d, path %s: %v (message skipped)\n", m.ID, mp, err)
362 return nil
363 }
364 size := st.Size() + int64(len(m.MsgPrefix))
365 if size != m.Size {
366 e.errors += fmt.Sprintf("message size mismatch for message id %d, database has %d, size is %d+%d=%d, using calculated size\n", m.ID, m.Size, len(m.MsgPrefix), st.Size(), size)
367 }
368 mr = FileMsgReader(m.MsgPrefix, mf)
369 }
370
371 if e.maildir {
372 p := e.mailboxName
373 if m.Flags.Seen {
374 p = filepath.Join(p, "cur")
375 } else {
376 p = filepath.Join(p, "new")
377 }
378 name := fmt.Sprintf("%d.%d.mox:2,", m.Received.Unix(), m.ID)
379
380 // Standard flags. May need to be sorted.
381 if m.Flags.Draft {
382 name += "D"
383 }
384 if m.Flags.Flagged {
385 name += "F"
386 }
387 if m.Flags.Answered {
388 name += "R"
389 }
390 if m.Flags.Seen {
391 name += "S"
392 }
393 if m.Flags.Deleted {
394 name += "T"
395 }
396
397 // Non-standard flag. We set them with a dovecot-keywords file.
398 if m.Flags.Forwarded {
399 name += e.maildirFlags.Flag("$Forwarded")
400 }
401 if m.Flags.Junk {
402 name += e.maildirFlags.Flag("$Junk")
403 }
404 if m.Flags.Notjunk {
405 name += e.maildirFlags.Flag("$NotJunk")
406 }
407 if m.Flags.Phishing {
408 name += e.maildirFlags.Flag("$Phishing")
409 }
410 if m.Flags.MDNSent {
411 name += e.maildirFlags.Flag("$MDNSent")
412 }
413
414 p = filepath.Join(p, name)
415
416 // We store messages with \r\n, maildir needs without. But we need to know the
417 // final size. So first convert, then create file with size, and write from buffer.
418 // todo: for large messages, we should go through a temporary file instead of memory.
419 var dst bytes.Buffer
420 r := bufio.NewReader(mr)
421 for {
422 line, rerr := r.ReadBytes('\n')
423 if rerr != io.EOF && rerr != nil {
424 e.errors += fmt.Sprintf("reading from message for id %d: %v (message skipped)\n", m.ID, rerr)
425 return nil
426 }
427 if len(line) > 0 {
428 if bytes.HasSuffix(line, []byte("\r\n")) {
429 line = line[:len(line)-1]
430 line[len(line)-1] = '\n'
431 }
432 if _, err := dst.Write(line); err != nil {
433 return fmt.Errorf("writing message: %v", err)
434 }
435 }
436 if rerr == io.EOF {
437 break
438 }
439 }
440 size := int64(dst.Len())
441 w, err := e.archiver.Create(p, size, m.Received)
442 if err != nil {
443 return fmt.Errorf("adding message to archive: %v", err)
444 }
445 if _, err := io.Copy(w, &dst); err != nil {
446 xerr := w.Close()
447 e.log.Check(xerr, "closing message")
448 return fmt.Errorf("copying message to archive: %v", err)
449 }
450 return w.Close()
451 }
452
453 mailfrom := "mox"
454 if m.MailFrom != "" {
455 mailfrom = m.MailFrom
456 }
457 // ../rfc/4155:80
458 if _, err := fmt.Fprintf(e.mboxwriter, "From %s %s\n", mailfrom, m.Received.Format(time.ANSIC)); err != nil {
459 return fmt.Errorf("write message line to mbox temp file: %v", err)
460 }
461
462 // Write message flags in the three headers that mbox consumers may (or may not) understand.
463 if m.Seen {
464 if _, err := fmt.Fprintf(e.mboxwriter, "Status: R\n"); err != nil {
465 return fmt.Errorf("writing status header: %v", err)
466 }
467 }
468 xstatus := ""
469 if m.Answered {
470 xstatus += "A"
471 }
472 if m.Flagged {
473 xstatus += "F"
474 }
475 if m.Draft {
476 xstatus += "T"
477 }
478 if m.Deleted {
479 xstatus += "D"
480 }
481 if xstatus != "" {
482 if _, err := fmt.Fprintf(e.mboxwriter, "X-Status: %s\n", xstatus); err != nil {
483 return fmt.Errorf("writing x-status header: %v", err)
484 }
485 }
486 var xkeywords []string
487 if m.Forwarded {
488 xkeywords = append(xkeywords, "$Forwarded")
489 }
490 if m.Junk && !m.Notjunk {
491 xkeywords = append(xkeywords, "$Junk")
492 }
493 if m.Notjunk && !m.Junk {
494 xkeywords = append(xkeywords, "$NotJunk")
495 }
496 if m.Phishing {
497 xkeywords = append(xkeywords, "$Phishing")
498 }
499 if m.MDNSent {
500 xkeywords = append(xkeywords, "$MDNSent")
501 }
502 if len(xkeywords) > 0 {
503 if _, err := fmt.Fprintf(e.mboxwriter, "X-Keywords: %s\n", strings.Join(xkeywords, ",")); err != nil {
504 return fmt.Errorf("writing x-keywords header: %v", err)
505 }
506 }
507
508 // ../rfc/4155:365 todo: rewrite messages to be 7-bit. still useful nowadays?
509
510 header := true
511 r := bufio.NewReader(mr)
512 for {
513 line, rerr := r.ReadBytes('\n')
514 if rerr != io.EOF && rerr != nil {
515 return fmt.Errorf("reading message: %v", rerr)
516 }
517 if len(line) > 0 {
518 // ../rfc/4155:354
519 if bytes.HasSuffix(line, []byte("\r\n")) {
520 line = line[:len(line)-1]
521 line[len(line)-1] = '\n'
522 }
523 if header && len(line) == 1 {
524 header = false
525 }
526 if header {
527 // Skip any previously stored flag-holding or now incorrect content-length headers.
528 // This assumes these headers are just a single line.
529 switch strings.ToLower(string(bytes.SplitN(line, []byte(":"), 2)[0])) {
530 case "status", "x-status", "x-keywords", "content-length":
531 continue
532 }
533 }
534 // ../rfc/4155:119
535 if bytes.HasPrefix(bytes.TrimLeft(line, ">"), []byte("From ")) {
536 if _, err := fmt.Fprint(e.mboxwriter, ">"); err != nil {
537 return fmt.Errorf("writing escaping >: %v", err)
538 }
539 }
540 if _, err := e.mboxwriter.Write(line); err != nil {
541 return fmt.Errorf("writing line: %v", err)
542 }
543 }
544 if rerr == io.EOF {
545 break
546 }
547 }
548 // ../rfc/4155:75
549 if _, err := fmt.Fprint(e.mboxwriter, "\n"); err != nil {
550 return fmt.Errorf("writing end of message newline: %v", err)
551 }
552 return nil
553}
554
555func (e *mailboxExport) Finish() error {
556 if e.maildir {
557 if e.maildirFlags.Empty() {
558 return nil
559 }
560
561 var b bytes.Buffer
562 for i, flag := range e.maildirFlags.List {
563 if _, err := fmt.Fprintf(&b, "%d %s\n", i, flag); err != nil {
564 return err
565 }
566 }
567 w, err := e.archiver.Create(e.mailboxName+"/dovecot-keywords", int64(b.Len()), e.start)
568 if err != nil {
569 return fmt.Errorf("adding dovecot-keywords: %v", err)
570 }
571 if _, err := w.Write(b.Bytes()); err != nil {
572 xerr := w.Close()
573 e.log.Check(xerr, "closing dovecot-keywords file after closing")
574 return fmt.Errorf("writing dovecot-keywords: %v", err)
575 }
576 return w.Close()
577 }
578
579 if err := e.mboxwriter.Flush(); err != nil {
580 return fmt.Errorf("flush mbox writer: %v", err)
581 }
582 fi, err := e.mboxtmp.Stat()
583 if err != nil {
584 return fmt.Errorf("stat temporary mbox file: %v", err)
585 }
586 if _, err := e.mboxtmp.Seek(0, 0); err != nil {
587 return fmt.Errorf("seek to start of temporary mbox file")
588 }
589 w, err := e.archiver.Create(e.mailboxName+".mbox", fi.Size(), fi.ModTime())
590 if err != nil {
591 return fmt.Errorf("add mbox to archive: %v", err)
592 }
593 if _, err := io.Copy(w, e.mboxtmp); err != nil {
594 xerr := w.Close()
595 e.log.Check(xerr, "closing mbox message file after error")
596 return fmt.Errorf("copying temp mbox file to archive: %v", err)
597 }
598 if err := w.Close(); err != nil {
599 return fmt.Errorf("closing message file: %v", err)
600 }
601 name := e.mboxtmp.Name()
602 err = e.mboxtmp.Close()
603 e.log.Check(err, "closing temporary mbox file")
604 err = os.Remove(name)
605 e.log.Check(err, "removing temporary mbox file", slog.String("path", name))
606 e.mboxwriter = nil
607 e.mboxtmp = nil
608 return nil
609}
610