14 "github.com/mjl-/mox/mlog"
17var pkglog = mlog.New("message", nil)
19func tcheck(t *testing.T, err error, msg string) {
22 t.Fatalf("%s: %s", msg, err)
26func tcompare(t *testing.T, got, exp any) {
28 if !reflect.DeepEqual(got, exp) {
29 t.Fatalf("got %v, expected %v", got, exp)
33func tfail(t *testing.T, err, expErr error) {
35 if (err == nil) != (expErr == nil) || expErr != nil && !errors.Is(err, expErr) {
36 t.Fatalf("got err %v, expected %v", err, expErr)
40func TestEmptyHeader(t *testing.T) {
42 p, err := EnsurePart(pkglog.Logger, true, strings.NewReader(s), int64(len(s)))
43 tcheck(t, err, "parse empty headers")
44 buf, err := io.ReadAll(p.Reader())
45 tcheck(t, err, "read")
47 tcompare(t, string(buf), expBody)
48 tcompare(t, p.MediaType, "")
49 tcompare(t, p.MediaSubType, "")
52func TestBadContentType(t *testing.T) {
55 // Pedantic is like strict.
57 s := "content-type: text/html;;\r\n\r\ntest"
58 p, err := EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
59 tfail(t, err, ErrBadContentType)
60 buf, err := io.ReadAll(p.Reader())
61 tcheck(t, err, "read")
62 tcompare(t, string(buf), expBody)
63 tcompare(t, p.MediaType, "APPLICATION")
64 tcompare(t, p.MediaSubType, "OCTET-STREAM")
68 s = "content-type: text/html;;\r\n\r\ntest"
69 p, err = EnsurePart(pkglog.Logger, true, strings.NewReader(s), int64(len(s)))
70 tfail(t, err, ErrBadContentType)
71 buf, err = io.ReadAll(p.Reader())
72 tcheck(t, err, "read")
73 tcompare(t, string(buf), expBody)
74 tcompare(t, p.MediaType, "APPLICATION")
75 tcompare(t, p.MediaSubType, "OCTET-STREAM")
77 // Non-strict but unrecoverable content-type.
78 s = "content-type: not a content type;;\r\n\r\ntest"
79 p, err = EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
80 tcheck(t, err, "parsing message with bad but recoverable content-type")
81 buf, err = io.ReadAll(p.Reader())
82 tcheck(t, err, "read")
83 tcompare(t, string(buf), expBody)
84 tcompare(t, p.MediaType, "APPLICATION")
85 tcompare(t, p.MediaSubType, "OCTET-STREAM")
87 // We try to use only the content-type, typically better than application/octet-stream.
88 s = "content-type: text/html;;\r\n\r\ntest"
89 p, err = EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
90 tcheck(t, err, "parsing message with bad but recoverable content-type")
91 buf, err = io.ReadAll(p.Reader())
92 tcheck(t, err, "read")
93 tcompare(t, string(buf), expBody)
94 tcompare(t, p.MediaType, "TEXT")
95 tcompare(t, p.MediaSubType, "HTML")
97 // Not recovering multipart, we won't have a boundary.
98 s = "content-type: multipart/mixed;;\r\n\r\ntest"
99 p, err = EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
100 tcheck(t, err, "parsing message with bad but recoverable content-type")
101 buf, err = io.ReadAll(p.Reader())
102 tcheck(t, err, "read")
103 tcompare(t, string(buf), expBody)
104 tcompare(t, p.MediaType, "APPLICATION")
105 tcompare(t, p.MediaSubType, "OCTET-STREAM")
108func TestBareCR(t *testing.T) {
109 s := "content-type: text/html\r\n\r\nbare\rcr\r\n"
110 expBody := "bare\rcr\r\n"
112 // Pedantic is like strict.
114 p, err := EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
115 tfail(t, err, errBareCR)
116 _, err = io.ReadAll(p.Reader())
117 tfail(t, err, errBareCR)
121 p, err = EnsurePart(pkglog.Logger, true, strings.NewReader(s), int64(len(s)))
122 tfail(t, err, errBareCR)
123 _, err = io.ReadAll(p.Reader())
124 tcheck(t, err, "read fallback part without error")
126 // Non-strict allows bare cr.
127 p, err = EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
128 tcheck(t, err, "parse")
129 buf, err := io.ReadAll(p.Reader())
130 tcheck(t, err, "read")
131 tcompare(t, string(buf), expBody)
134var basicMsg = strings.ReplaceAll(`From: <mjl@mox.example>
135Content-Type: text/plain
136Content-Transfer-Encoding: base64
141func TestBasic(t *testing.T) {
142 r := strings.NewReader(basicMsg)
143 p, err := Parse(pkglog.Logger, true, r)
144 tcheck(t, err, "new reader")
146 buf, err := io.ReadAll(p.RawReader())
147 tcheck(t, err, "read raw")
148 expBody := "aGkK\r\n"
149 tcompare(t, string(buf), expBody)
151 buf, err = io.ReadAll(p.Reader())
152 tcheck(t, err, "read decoded")
153 tcompare(t, string(buf), "hi\r\n")
155 if p.RawLineCount != 1 {
156 t.Fatalf("basic message, got %d lines, expected 1", p.RawLineCount)
158 if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
159 t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
164var basicMsg2 = strings.ReplaceAll(`Date: Mon, 7 Feb 1994 21:52:25 -0800 (PST)
165From: Fred Foobar <foobar@Blurdybloop.example>
166Subject: afternoon meeting
167To: mooch@owatagu.siam.edu.example
168Message-Id: <B27397-0100000@Blurdybloop.example>
170Content-Type: TEXT/PLAIN; CHARSET=US-ASCII
172Hello Joe, do you think we can meet at 3:30 tomorrow?
176func TestBasic2(t *testing.T) {
177 r := strings.NewReader(basicMsg2)
178 p, err := Parse(pkglog.Logger, true, r)
179 tcheck(t, err, "new reader")
181 buf, err := io.ReadAll(p.RawReader())
182 tcheck(t, err, "read raw")
183 expBody := "Hello Joe, do you think we can meet at 3:30 tomorrow?\r\n\r\n"
184 tcompare(t, string(buf), expBody)
186 buf, err = io.ReadAll(p.Reader())
187 tcheck(t, err, "read decoded")
188 tcompare(t, string(buf), expBody)
190 if p.RawLineCount != 2 {
191 t.Fatalf("basic message, got %d lines, expected 2", p.RawLineCount)
193 if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
194 t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
197 r = strings.NewReader(basicMsg2)
198 p, err = Parse(pkglog.Logger, true, r)
199 tcheck(t, err, "new reader")
200 err = p.Walk(pkglog.Logger, nil)
201 tcheck(t, err, "walk")
202 if p.RawLineCount != 2 {
203 t.Fatalf("basic message, got %d lines, expected 2", p.RawLineCount)
205 if size := p.EndOffset - p.BodyOffset; size != int64(len(expBody)) {
206 t.Fatalf("basic message, got size %d, expected %d", size, len(expBody))
210var mimeMsg = strings.ReplaceAll(`From: Nathaniel Borenstein <nsb@bellcore.com>
211To: Ned Freed <ned@innosoft.com>
212Date: Sun, 21 Mar 1993 23:56:48 -0800 (PST)
213Subject: Sample message
215Content-type: multipart/mixed; boundary="simple boundary"
217This is the preamble. It is to be ignored, though it
218is a handy place for composition agents to include an
219explanatory note to non-MIME conformant readers.
223This is implicitly typed plain US-ASCII text.
224It does NOT end with a linebreak.
226Content-type: text/plain; charset=us-ascii
228This is explicitly typed plain US-ASCII text.
229It DOES end with a linebreak.
233This is the epilogue. It is also to be ignored.
236func TestMime(t *testing.T) {
238 r := strings.NewReader(mimeMsg)
239 p, err := Parse(pkglog.Logger, true, r)
240 tcheck(t, err, "new reader")
241 if len(p.bound) == 0 {
242 t.Fatalf("got no bound, expected bound for mime message")
245 pp, err := p.ParseNextPart(pkglog.Logger)
246 tcheck(t, err, "next part")
247 buf, err := io.ReadAll(pp.Reader())
248 tcheck(t, err, "read all")
249 tcompare(t, string(buf), "This is implicitly typed plain US-ASCII text.\r\nIt does NOT end with a linebreak.")
251 pp, err = p.ParseNextPart(pkglog.Logger)
252 tcheck(t, err, "next part")
253 buf, err = io.ReadAll(pp.Reader())
254 tcheck(t, err, "read all")
255 tcompare(t, string(buf), "This is explicitly typed plain US-ASCII text.\r\nIt DOES end with a linebreak.\r\n")
257 _, err = p.ParseNextPart(pkglog.Logger)
258 tcompare(t, err, io.EOF)
260 if len(p.Parts) != 2 {
261 t.Fatalf("got %d parts, expected 2", len(p.Parts))
263 if p.Parts[0].RawLineCount != 2 {
264 t.Fatalf("got %d lines for first part, expected 2", p.Parts[0].RawLineCount)
266 if p.Parts[1].RawLineCount != 2 {
267 t.Fatalf("got %d lines for second part, expected 2", p.Parts[1].RawLineCount)
271func TestLongLine(t *testing.T) {
272 line := make([]byte, maxLineLength+1)
273 for i := range line {
276 _, err := Parse(pkglog.Logger, true, bytes.NewReader(line))
277 tfail(t, err, errLineTooLong)
280func TestBareCrLf(t *testing.T) {
281 parse := func(strict bool, s string) error {
282 p, err := Parse(pkglog.Logger, strict, strings.NewReader(s))
286 return p.Walk(pkglog.Logger, nil)
288 err := parse(false, "subject: test\ntest\r\n")
289 tfail(t, err, errBareLF)
290 err = parse(false, "\r\ntest\ntest\r\n")
291 tfail(t, err, errBareLF)
294 err = parse(false, "subject: test\rtest\r\n")
295 tfail(t, err, errBareCR)
296 err = parse(false, "\r\ntest\rtest\r\n")
297 tfail(t, err, errBareCR)
300 err = parse(true, "subject: test\rtest\r\n")
301 tfail(t, err, errBareCR)
302 err = parse(true, "\r\ntest\rtest\r\n")
303 tfail(t, err, errBareCR)
305 err = parse(false, "subject: test\rtest\r\n")
306 tcheck(t, err, "header with bare cr")
307 err = parse(false, "\r\ntest\rtest\r\n")
308 tcheck(t, err, "body with bare cr")
311func TestMissingClosingBoundary(t *testing.T) {
312 message := strings.ReplaceAll(`Content-Type: multipart/mixed; boundary=x
318 msg, err := Parse(pkglog.Logger, false, strings.NewReader(message))
319 tcheck(t, err, "new reader")
321 tfail(t, err, errMissingClosingBoundary)
323 msg, _ = Parse(pkglog.Logger, false, strings.NewReader(message))
324 err = msg.Walk(pkglog.Logger, nil)
325 tfail(t, err, errMissingClosingBoundary)
328func TestHeaderEOF(t *testing.T) {
329 message := "header: test"
330 _, err := Parse(pkglog.Logger, false, strings.NewReader(message))
331 tfail(t, err, errUnexpectedEOF)
334func TestBodyEOF(t *testing.T) {
335 message := "header: test\r\n\r\ntest"
336 msg, err := Parse(pkglog.Logger, true, strings.NewReader(message))
337 tcheck(t, err, "new reader")
338 buf, err := io.ReadAll(msg.Reader())
339 tcheck(t, err, "read body")
340 tcompare(t, string(buf), "test")
343func TestWalk(t *testing.T) {
344 var message = strings.ReplaceAll(`Content-Type: multipart/related; boundary="----=_NextPart_afb3ad6f146b12b709deac3e387a3ad7"
346------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7
347Content-Type: multipart/alternative; boundary="----=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt"
349------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt
350Content-Type: text/plain; charset="utf-8"
351Content-Transfer-Encoding: 8bit
356------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt
357Content-Type: text/html; charset="utf-8"
358Content-Transfer-Encoding: 8bit
362------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7_alt--
363------=_NextPart_afb3ad6f146b12b709deac3e387a3ad7--
367 msg, err := Parse(pkglog.Logger, false, strings.NewReader(message))
368 tcheck(t, err, "new reader")
369 enforceSequential = true
371 enforceSequential = false
374 tcheck(t, err, "walkmsg")
376 msg, _ = Parse(pkglog.Logger, false, strings.NewReader(message))
377 err = msg.Walk(pkglog.Logger, nil)
378 tcheck(t, err, "msg.Walk")
381func TestNested(t *testing.T) {
383 nestedMessage := strings.ReplaceAll(`MIME-Version: 1.0
384From: Nathaniel Borenstein <nsb@nsb.fv.com>
385To: Ned Freed <ned@innosoft.com>
386Date: Fri, 07 Oct 1994 16:15:05 -0700 (PDT)
387Subject: A multipart example
388Content-Type: multipart/mixed;
389 boundary=unique-boundary-1
391This is the preamble area of a multipart message.
392Mail readers that understand multipart format
393should ignore this preamble.
395If you are reading this text, you might want to
396consider changing to a mail reader that understands
397how to properly display multipart messages.
401 ... Some text appears here ...
403[Note that the blank between the boundary and the start
404 of the text in this part means no header fields were
405 given and this is text in the US-ASCII character set.
406 It could have been done with explicit typing as in the
410Content-type: text/plain; charset=US-ASCII
412This could have been part of the previous part, but
413illustrates explicit versus implicit typing of body
417Content-Type: multipart/parallel; boundary=unique-boundary-2
420Content-Type: audio/basic
421Content-Transfer-Encoding: base64
425Content-Type: image/jpeg
426Content-Transfer-Encoding: base64
432Content-type: text/enriched
434This is <bold><italic>enriched.</italic></bold>
435<smaller>as defined in RFC 1896</smaller>
438<bigger><bigger>cool?</bigger></bigger>
441Content-Type: message/rfc822
443From: (mailbox in US-ASCII)
444To: (address in US-ASCII)
445Subject: (subject in US-ASCII)
446Content-Type: Text/plain; charset=ISO-8859-1
447Content-Transfer-Encoding: Quoted-printable
449 ... Additional text in ISO-8859-1 goes here ...
454 msg, err := Parse(pkglog.Logger, true, strings.NewReader(nestedMessage))
455 tcheck(t, err, "new reader")
456 enforceSequential = true
458 enforceSequential = false
461 tcheck(t, err, "walkmsg")
463 if len(msg.Parts) != 5 {
464 t.Fatalf("got %d parts, expected 5", len(msg.Parts))
466 sub := msg.Parts[4].Message
468 t.Fatalf("missing part.Message")
470 buf, err := io.ReadAll(sub.Reader())
472 t.Fatalf("read message body: %v", err)
474 exp := " ... Additional text in ISO-8859-1 goes here ...\r\n"
475 if string(buf) != exp {
476 t.Fatalf("got %q, expected %q", buf, exp)
479 msg, _ = Parse(pkglog.Logger, false, strings.NewReader(nestedMessage))
480 err = msg.Walk(pkglog.Logger, nil)
481 tcheck(t, err, "msg.Walk")
485func TestWalkdir(t *testing.T) {
486 // Ensure these dirs exist. Developers should bring their own ham/spam example
488 os.MkdirAll("../testdata/train/ham", 0770)
489 os.MkdirAll("../testdata/train/spam", 0770)
492 twalkdir(t, "../testdata/train/ham", &n, &nfail)
493 twalkdir(t, "../testdata/train/spam", &n, &nfail)
494 log.Printf("parsing messages: %d/%d failed", nfail, n)
497func twalkdir(t *testing.T, dir string, n, nfail *int) {
498 names, err := os.ReadDir(dir)
499 tcheck(t, err, "readdir")
500 if len(names) > 1000 {
503 for _, name := range names {
504 p := filepath.Join(dir, name.Name())
509 log.Printf("%s: %v", p, err)
514func walk(path string) error {
515 r, err := os.Open(path)
520 msg, err := Parse(pkglog.Logger, false, r)
527func walkmsg(msg *Part) error {
528 enforceSequential = true
530 enforceSequential = false
533 if len(msg.bound) == 0 {
534 buf, err := io.ReadAll(msg.Reader())
539 if msg.MediaType == "MESSAGE" && (msg.MediaSubType == "RFC822" || msg.MediaSubType == "GLOBAL") {
540 mp, err := Parse(pkglog.Logger, false, bytes.NewReader(buf))
548 size := msg.EndOffset - msg.BodyOffset
550 log.Printf("msg %v", msg)
551 panic("inconsistent body/end offset")
553 sr := io.NewSectionReader(msg.r, msg.BodyOffset, size)
554 decsr := msg.bodyReader(sr)
555 buf2, err := io.ReadAll(decsr)
560 if !bytes.Equal(buf, buf2) {
561 panic("data mismatch reading sequentially vs via offsets")
568 pp, err := msg.ParseNextPart(pkglog.Logger)
575 if err := walkmsg(pp); err != nil {
578 enforceSequential = true
582func TestEmbedded(t *testing.T) {
583 f, err := os.Open("../testdata/message/message-rfc822-multipart.eml")
584 tcheck(t, err, "open")
586 tcheck(t, err, "stat")
587 _, err = EnsurePart(pkglog.Logger, false, f, fi.Size())
588 tcheck(t, err, "parse")
591func TestEmbedded2(t *testing.T) {
592 buf, err := os.ReadFile("../testdata/message/message-rfc822-multipart2.eml")
593 tcheck(t, err, "readfile")
594 buf = bytes.ReplaceAll(buf, []byte("\n"), []byte("\r\n"))
596 _, err = EnsurePart(pkglog.Logger, false, bytes.NewReader(buf), int64(len(buf)))
600func TestNetMailAddress(t *testing.T) {
601 const s = "From: \" \"@example.com\r\n\r\nbody\r\n"
602 p, err := EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
603 tcheck(t, err, "parse")
604 tcompare(t, p.Envelope.From, []Address{{"", `" "`, "example.com"}})
607func TestParseQuotedCharset(t *testing.T) {
608 const s = "From: =?iso-8859-2?Q?Krist=FDna?= <k@example.com>\r\n\r\nbody\r\n"
609 p, err := EnsurePart(pkglog.Logger, false, strings.NewReader(s), int64(len(s)))
610 tcheck(t, err, "parse")
611 tcompare(t, p.Envelope.From, []Address{{"Kristýna", "k", "example.com"}})