1package message
2
3import (
4 "bytes"
5 "errors"
6 "io"
7 "log"
8 "os"
9 "path/filepath"
10 "reflect"
11 "strings"
12 "testing"
13
14 "github.com/mjl-/mox/mlog"
15)
16
17var pkglog = mlog.New("message", nil)
18
19func tcheck(t *testing.T, err error, msg string) {
20 t.Helper()
21 if err != nil {
22 t.Fatalf("%s: %s", msg, err)
23 }
24}
25
26func tcompare(t *testing.T, got, exp any) {
27 t.Helper()
28 if !reflect.DeepEqual(got, exp) {
29 t.Fatalf("got %v, expected %v", got, exp)
30 }
31}
32
33func tfail(t *testing.T, err, expErr error) {
34 t.Helper()
35 if (err == nil) != (expErr == nil) || expErr != nil && !errors.Is(err, expErr) {
36 t.Fatalf("got err %v, expected %v", err, expErr)
37 }
38}
39
40func TestEmptyHeader(t *testing.T) {
41 s := "\r\nx"
42 p, err := EnsurePart(pkglog.Logger, true, strings.NewReader(s), int64(len(s)))
43 tcheck(t, err, "parse empty headers")
44 buf, err := io.ReadAll(p.Reader())
45 tcheck(t, err, "read")
46 expBody := "x"
47 tcompare(t, string(buf), expBody)
48 tcompare(t, p.MediaType, "")
49 tcompare(t, p.MediaSubType, "")
50}
51
52func TestBadContentType(t *testing.T) {
53 expBody := "test"
54
55 // Pedantic is like strict.
56 Pedantic = true
57 s := "content-type: text/html;;\r\n\r\ntest"
58 p, err := EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
59 tfail(t, err, ErrBadContentType)
60 buf, err := io.ReadAll(p.Reader())
61 tcheck(t, err, "read")
62 tcompare(t, string(buf), expBody)
63 tcompare(t, p.MediaType, "APPLICATION")
64 tcompare(t, p.MediaSubType, "OCTET-STREAM")
65 Pedantic = false
66
67 // Strict
68 s = "content-type: text/html;;\r\n\r\ntest"
69 p, err = EnsurePart(pkglog.Logger, true, strings.NewReader(s), int64(len(s)))
70 tfail(t, err, ErrBadContentType)
71 buf, err = io.ReadAll(p.Reader())
72 tcheck(t, err, "read")
73 tcompare(t, string(buf), expBody)
74 tcompare(t, p.MediaType, "APPLICATION")
75 tcompare(t, p.MediaSubType, "OCTET-STREAM")
76
77 // Non-strict but unrecoverable content-type.
78 s = "content-type: not a content type;;\r\n\r\ntest"
79 p, err = EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
80 tcheck(t, err, "parsing message with bad but recoverable content-type")
81 buf, err = io.ReadAll(p.Reader())
82 tcheck(t, err, "read")
83 tcompare(t, string(buf), expBody)
84 tcompare(t, p.MediaType, "APPLICATION")
85 tcompare(t, p.MediaSubType, "OCTET-STREAM")
86
87 // We try to use only the content-type, typically better than application/octet-stream.
88 s = "content-type: text/html;;\r\n\r\ntest"
89 p, err = EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
90 tcheck(t, err, "parsing message with bad but recoverable content-type")
91 buf, err = io.ReadAll(p.Reader())
92 tcheck(t, err, "read")
93 tcompare(t, string(buf), expBody)
94 tcompare(t, p.MediaType, "TEXT")
95 tcompare(t, p.MediaSubType, "HTML")
96
97 // Not recovering multipart, we won't have a boundary.
98 s = "content-type: multipart/mixed;;\r\n\r\ntest"
99 p, err = EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
100 tcheck(t, err, "parsing message with bad but recoverable content-type")
101 buf, err = io.ReadAll(p.Reader())
102 tcheck(t, err, "read")
103 tcompare(t, string(buf), expBody)
104 tcompare(t, p.MediaType, "APPLICATION")
105 tcompare(t, p.MediaSubType, "OCTET-STREAM")
106}
107
108func TestBareCR(t *testing.T) {
109 s := "content-type: text/html\r\n\r\nbare\rcr\r\n"
110 expBody := "bare\rcr\r\n"
111
112 // Pedantic is like strict.
113 Pedantic = true
114 p, err := EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
115 tfail(t, err, errBareCR)
116 _, err = io.ReadAll(p.Reader())
117 tfail(t, err, errBareCR)
118 Pedantic = false
119
120 // Strict.
121 p, err = EnsurePart(pkglog.Logger, true, strings.NewReader(s), int64(len(s)))
122 tfail(t, err, errBareCR)
123 _, err = io.ReadAll(p.Reader())
124 tcheck(t, err, "read fallback part without error")
125
126 // Non-strict allows bare cr.
127 p, err = EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
128 tcheck(t, err, "parse")
129 buf, err := io.ReadAll(p.Reader())
130 tcheck(t, err, "read")
131 tcompare(t, string(buf), expBody)
132}
133
134var basicMsg = strings.ReplaceAll(`From: <mjl@mox.example>
135Content-Type: text/plain
136Content-Transfer-Encoding: base64
137
138aGkK
139`, "\n", "\r\n")
140
141func TestBasic(t *testing.T) {
142 r := strings.NewReader(basicMsg)
143 p, err := Parse(pkglog.Logger, true, r)
144 tcheck(t, err, "new reader")
145
146 buf, err := io.ReadAll(p.RawReader())
147 tcheck(t, err, "read raw")
148 expBody := "aGkK\r\n"
149 tcompare(t, string(buf), expBody)
150
151 buf, err = io.ReadAll(p.Reader())
152 tcheck(t, err, "read decoded")
153 tcompare(t, string(buf), "hi\r\n")
154
155 if p.RawLineCount != 1 {
156 t.Fatalf("basic message, got %d lines, expected 1", p.RawLineCount)
157 }
158 if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
159 t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
160 }
161}
162
163// From ../rfc/3501:2589
164var basicMsg2 = strings.ReplaceAll(`Date: Mon, 7 Feb 1994 21:52:25 -0800 (PST)
165From: Fred Foobar <foobar@Blurdybloop.example>
166Subject: afternoon meeting
167To: mooch@owatagu.siam.edu.example
168Message-Id: <B27397-0100000@Blurdybloop.example>
169MIME-Version: 1.0
170Content-Type: TEXT/PLAIN; CHARSET=US-ASCII
171
172Hello Joe, do you think we can meet at 3:30 tomorrow?
173
174`, "\n", "\r\n")
175
176func TestBasic2(t *testing.T) {
177 r := strings.NewReader(basicMsg2)
178 p, err := Parse(pkglog.Logger, true, r)
179 tcheck(t, err, "new reader")
180
181 buf, err := io.ReadAll(p.RawReader())
182 tcheck(t, err, "read raw")
183 expBody := "Hello Joe, do you think we can meet at 3:30 tomorrow?\r\n\r\n"
184 tcompare(t, string(buf), expBody)
185
186 buf, err = io.ReadAll(p.Reader())
187 tcheck(t, err, "read decoded")
188 tcompare(t, string(buf), expBody)
189
190 if p.RawLineCount != 2 {
191 t.Fatalf("basic message, got %d lines, expected 2", p.RawLineCount)
192 }
193 if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
194 t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
195 }
196
197 r = strings.NewReader(basicMsg2)
198 p, err = Parse(pkglog.Logger, true, r)
199 tcheck(t, err, "new reader")
200 err = p.Walk(pkglog.Logger, nil)
201 tcheck(t, err, "walk")
202 if p.RawLineCount != 2 {
203 t.Fatalf("basic message, got %d lines, expected 2", p.RawLineCount)
204 }
205 if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
206 t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
207 }
208}
209
210var mimeMsg = strings.ReplaceAll(`From: Nathaniel Borenstein <nsb@bellcore.com>
211To: Ned Freed <ned@innosoft.com>
212Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST)
213Subject: Sample message
214MIME-Version: 1.0
215Content-type: multipart/mixed; boundary="simple boundary"
216
217This is the preamble. It is to be ignored, though it
218is a handy place for composition agents to include an
219explanatory note to non-MIME conformant readers.
220
221--simple boundary
222
223This is implicitly typed plain US-ASCII text.
224It does NOT end with a linebreak.
225--simple boundary
226Content-type: text/plain; charset=us-ascii
227
228This is explicitly typed plain US-ASCII text.
229It DOES end with a linebreak.
230
231--simple boundary--
232
233This is the epilogue. It is also to be ignored.
234`, "\n", "\r\n")
235
236func TestMime(t *testing.T) {
237 // from ../rfc/2046:1148
238 r := strings.NewReader(mimeMsg)
239 p, err := Parse(pkglog.Logger, true, r)
240 tcheck(t, err, "new reader")
241 if len(p.bound) == 0 {
242 t.Fatalf("got no bound, expected bound for mime message")
243 }
244
245 pp, err := p.ParseNextPart(pkglog.Logger)
246 tcheck(t, err, "next part")
247 buf, err := io.ReadAll(pp.Reader())
248 tcheck(t, err, "read all")
249 tcompare(t, string(buf), "This is implicitly typed plain US-ASCII text.\r\nIt does NOT end with a linebreak.")
250
251 pp, err = p.ParseNextPart(pkglog.Logger)
252 tcheck(t, err, "next part")
253 buf, err = io.ReadAll(pp.Reader())
254 tcheck(t, err, "read all")
255 tcompare(t, string(buf), "This is explicitly typed plain US-ASCII text.\r\nIt DOES end with a linebreak.\r\n")
256
257 _, err = p.ParseNextPart(pkglog.Logger)
258 tcompare(t, err, io.EOF)
259
260 if len(p.Parts) != 2 {
261 t.Fatalf("got %d parts, expected 2", len(p.Parts))
262 }
263 if p.Parts[0].RawLineCount != 2 {
264 t.Fatalf("got %d lines for first part, expected 2", p.Parts[0].RawLineCount)
265 }
266 if p.Parts[1].RawLineCount != 2 {
267 t.Fatalf("got %d lines for second part, expected 2", p.Parts[1].RawLineCount)
268 }
269}
270
271func TestLongLine(t *testing.T) {
272 line := make([]byte, maxLineLength+1)
273 for i := range line {
274 line[i] = 'a'
275 }
276 _, err := Parse(pkglog.Logger, true, bytes.NewReader(line))
277 tfail(t, err, errLineTooLong)
278}
279
280func TestBareCrLf(t *testing.T) {
281 parse := func(strict bool, s string) error {
282 p, err := Parse(pkglog.Logger, strict, strings.NewReader(s))
283 if err != nil {
284 return err
285 }
286 return p.Walk(pkglog.Logger, nil)
287 }
288 err := parse(false, "subject: test\ntest\r\n")
289 tfail(t, err, errBareLF)
290 err = parse(false, "\r\ntest\ntest\r\n")
291 tfail(t, err, errBareLF)
292
293 Pedantic = true
294 err = parse(false, "subject: test\rtest\r\n")
295 tfail(t, err, errBareCR)
296 err = parse(false, "\r\ntest\rtest\r\n")
297 tfail(t, err, errBareCR)
298 Pedantic = false
299
300 err = parse(true, "subject: test\rtest\r\n")
301 tfail(t, err, errBareCR)
302 err = parse(true, "\r\ntest\rtest\r\n")
303 tfail(t, err, errBareCR)
304
305 err = parse(false, "subject: test\rtest\r\n")
306 tcheck(t, err, "header with bare cr")
307 err = parse(false, "\r\ntest\rtest\r\n")
308 tcheck(t, err, "body with bare cr")
309}
310
311func TestMissingClosingBoundary(t *testing.T) {
312 message := strings.ReplaceAll(`Content-Type: multipart/mixed; boundary=x
313
314--x
315
316test
317`, "\n", "\r\n")
318 msg, err := Parse(pkglog.Logger, false, strings.NewReader(message))
319 tcheck(t, err, "new reader")
320 err = walkmsg(&msg)
321 tfail(t, err, errMissingClosingBoundary)
322
323 msg, _ = Parse(pkglog.Logger, false, strings.NewReader(message))
324 err = msg.Walk(pkglog.Logger, nil)
325 tfail(t, err, errMissingClosingBoundary)
326}
327
328func TestHeaderEOF(t *testing.T) {
329 message := "header: test"
330 _, err := Parse(pkglog.Logger, false, strings.NewReader(message))
331 tfail(t, err, errUnexpectedEOF)
332}
333
334func TestBodyEOF(t *testing.T) {
335 message := "header: test\r\n\r\ntest"
336 msg, err := Parse(pkglog.Logger, true, strings.NewReader(message))
337 tcheck(t, err, "new reader")
338 buf, err := io.ReadAll(msg.Reader())
339 tcheck(t, err, "read body")
340 tcompare(t, string(buf), "test")
341}
342
343func TestWalk(t *testing.T) {
344 var message = strings.ReplaceAll(`Content-Type: multipart/related; boundary="----=_NextPart_afb3ad6f146b12b709deac3e387a3ad7"
345
346------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7
347Content-Type: multipart/alternative; boundary="----=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt"
348
349------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt
350Content-Type: text/plain; charset="utf-8"
351Content-Transfer-Encoding: 8bit
352
353test
354
355
356------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt
357Content-Type: text/html; charset="utf-8"
358Content-Transfer-Encoding: 8bit
359
360test
361
362------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt--
363------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7--
364
365`, "\n", "\r\n")
366
367 msg, err := Parse(pkglog.Logger, false, strings.NewReader(message))
368 tcheck(t, err, "new reader")
369 enforceSequential = true
370 defer func() {
371 enforceSequential = false
372 }()
373 err = walkmsg(&msg)
374 tcheck(t, err, "walkmsg")
375
376 msg, _ = Parse(pkglog.Logger, false, strings.NewReader(message))
377 err = msg.Walk(pkglog.Logger, nil)
378 tcheck(t, err, "msg.Walk")
379}
380
381func TestNested(t *testing.T) {
382 // From ../rfc/2049:801
383 nestedMessage := strings.ReplaceAll(`MIME-Version: 1.0
384From: Nathaniel Borenstein <nsb@nsb.fv.com>
385To: Ned Freed <ned@innosoft.com>
386Date: Fri, 07 Oct 1994 16:15:05 -0700 (PDT)
387Subject: A multipart example
388Content-Type: multipart/mixed;
389 boundary=unique-boundary-1
390
391This is the preamble area of a multipart message.
392Mail readers that understand multipart format
393should ignore this preamble.
394
395If you are reading this text, you might want to
396consider changing to a mail reader that understands
397how to properly display multipart messages.
398
399--unique-boundary-1
400
401 ... Some text appears here ...
402
403[Note that the blank between the boundary and the start
404 of the text in this part means no header fields were
405 given and this is text in the US-ASCII character set.
406 It could have been done with explicit typing as in the
407 next part.]
408
409--unique-boundary-1
410Content-type: text/plain; charset=US-ASCII
411
412This could have been part of the previous part, but
413illustrates explicit versus implicit typing of body
414parts.
415
416--unique-boundary-1
417Content-Type: multipart/parallel; boundary=unique-boundary-2
418
419--unique-boundary-2
420Content-Type: audio/basic
421Content-Transfer-Encoding: base64
422
423
424--unique-boundary-2
425Content-Type: image/jpeg
426Content-Transfer-Encoding: base64
427
428
429--unique-boundary-2--
430
431--unique-boundary-1
432Content-type: text/enriched
433
434This is <bold><italic>enriched.</italic></bold>
435<smaller>as defined in RFC 1896</smaller>
436
437Isn't it
438<bigger><bigger>cool?</bigger></bigger>
439
440--unique-boundary-1
441Content-Type: message/rfc822
442
443From: (mailbox in US-ASCII)
444To: (address in US-ASCII)
445Subject: (subject in US-ASCII)
446Content-Type: Text/plain; charset=ISO-8859-1
447Content-Transfer-Encoding: Quoted-printable
448
449 ... Additional text in ISO-8859-1 goes here ...
450
451--unique-boundary-1--
452`, "\n", "\r\n")
453
454 msg, err := Parse(pkglog.Logger, true, strings.NewReader(nestedMessage))
455 tcheck(t, err, "new reader")
456 enforceSequential = true
457 defer func() {
458 enforceSequential = false
459 }()
460 err = walkmsg(&msg)
461 tcheck(t, err, "walkmsg")
462
463 if len(msg.Parts) != 5 {
464 t.Fatalf("got %d parts, expected 5", len(msg.Parts))
465 }
466 sub := msg.Parts[4].Message
467 if sub == nil {
468 t.Fatalf("missing part.Message")
469 }
470 buf, err := io.ReadAll(sub.Reader())
471 if err != nil {
472 t.Fatalf("read message body: %v", err)
473 }
474 exp := " ... Additional text in ISO-8859-1 goes here ...\r\n"
475 if string(buf) != exp {
476 t.Fatalf("got %q, expected %q", buf, exp)
477 }
478
479 msg, _ = Parse(pkglog.Logger, false, strings.NewReader(nestedMessage))
480 err = msg.Walk(pkglog.Logger, nil)
481 tcheck(t, err, "msg.Walk")
482
483}
484
485func TestWalkdir(t *testing.T) {
486 // Ensure these dirs exist. Developers should bring their own ham/spam example
487 // emails.
488 os.MkdirAll("../testdata/train/ham", 0770)
489 os.MkdirAll("../testdata/train/spam", 0770)
490
491 var n, nfail int
492 twalkdir(t, "../testdata/train/ham", &n, &nfail)
493 twalkdir(t, "../testdata/train/spam", &n, &nfail)
494 log.Printf("parsing messages: %d/%d failed", nfail, n)
495}
496
497func twalkdir(t *testing.T, dir string, n, nfail *int) {
498 names, err := os.ReadDir(dir)
499 tcheck(t, err, "readdir")
500 if len(names) > 1000 {
501 names = names[:1000]
502 }
503 for _, name := range names {
504 p := filepath.Join(dir, name.Name())
505 *n++
506 err := walk(p)
507 if err != nil {
508 *nfail++
509 log.Printf("%s: %v", p, err)
510 }
511 }
512}
513
514func walk(path string) error {
515 r, err := os.Open(path)
516 if err != nil {
517 return err
518 }
519 defer r.Close()
520 msg, err := Parse(pkglog.Logger, false, r)
521 if err != nil {
522 return err
523 }
524 return walkmsg(&msg)
525}
526
527func walkmsg(msg *Part) error {
528 enforceSequential = true
529 defer func() {
530 enforceSequential = false
531 }()
532
533 if len(msg.bound) == 0 {
534 buf, err := io.ReadAll(msg.Reader())
535 if err != nil {
536 return err
537 }
538
539 if msg.MediaType == "MESSAGE" && (msg.MediaSubType == "RFC822" || msg.MediaSubType == "GLOBAL") {
540 mp, err := Parse(pkglog.Logger, false, bytes.NewReader(buf))
541 if err != nil {
542 return err
543 }
544 msg.Message = &mp
545 walkmsg(msg.Message)
546 }
547
548 size := msg.EndOffset - msg.BodyOffset
549 if size < 0 {
550 log.Printf("msg %v", msg)
551 panic("inconsistent body/end offset")
552 }
553 sr := io.NewSectionReader(msg.r, msg.BodyOffset, size)
554 decsr := msg.bodyReader(sr)
555 buf2, err := io.ReadAll(decsr)
556 if err != nil {
557 return err
558 }
559
560 if !bytes.Equal(buf, buf2) {
561 panic("data mismatch reading sequentially vs via offsets")
562 }
563
564 return nil
565 }
566
567 for {
568 pp, err := msg.ParseNextPart(pkglog.Logger)
569 if err == io.EOF {
570 return nil
571 }
572 if err != nil {
573 return err
574 }
575 if err := walkmsg(pp); err != nil {
576 return err
577 }
578 enforceSequential = true
579 }
580}
581
582func TestEmbedded(t *testing.T) {
583 f, err := os.Open("../testdata/message/message-rfc822-multipart.eml")
584 tcheck(t, err, "open")
585 fi, err := f.Stat()
586 tcheck(t, err, "stat")
587 _, err = EnsurePart(pkglog.Logger, false, f, fi.Size())
588 tcheck(t, err, "parse")
589}
590
591func TestEmbedded2(t *testing.T) {
592 buf, err := os.ReadFile("../testdata/message/message-rfc822-multipart2.eml")
593 tcheck(t, err, "readfile")
594 buf = bytes.ReplaceAll(buf, []byte("\n"), []byte("\r\n"))
595
596 _, err = EnsurePart(pkglog.Logger, false, bytes.NewReader(buf), int64(len(buf)))
597 tfail(t, err, nil)
598}
599