1package webmail
2
3import (
4 "bufio"
5 "errors"
6 "fmt"
7 "io"
8 "log/slog"
9 "mime"
10 "net/textproto"
11 "net/url"
12 "strings"
13
14 "golang.org/x/text/encoding/ianaindex"
15
16 "github.com/mjl-/mox/dns"
17 "github.com/mjl-/mox/message"
18 "github.com/mjl-/mox/mlog"
19 "github.com/mjl-/mox/mox-"
20 "github.com/mjl-/mox/moxio"
21 "github.com/mjl-/mox/smtp"
22 "github.com/mjl-/mox/store"
23)
24
25// todo: we should have all needed information for messageItem in store.Message (perhaps some data in message.Part) for fast access, not having to parse the on-disk message file.
26
27var wordDecoder = mime.WordDecoder{
28 CharsetReader: func(charset string, r io.Reader) (io.Reader, error) {
29 switch strings.ToLower(charset) {
30 case "", "us-ascii", "utf-8":
31 return r, nil
32 }
33 enc, _ := ianaindex.MIME.Encoding(charset)
34 if enc == nil {
35 enc, _ = ianaindex.IANA.Encoding(charset)
36 }
37 if enc == nil {
38 return r, fmt.Errorf("unknown charset %q", charset)
39 }
40 return enc.NewDecoder().Reader(r), nil
41 },
42}
43
44// Attempt q/b-word-decode name, coming from Content-Type "name" field or
45// Content-Disposition "filename" field.
46//
47// RFC 2231 specify an encoding for non-ascii values in mime header parameters. But
48// it appears common practice to instead just q/b-word encode the values.
49// Thunderbird and gmail.com do this for the Content-Type "name" parameter.
50// gmail.com also does that for the Content-Disposition "filename" parameter, where
51// Thunderbird uses the RFC 2231-defined encoding. Go's mime.ParseMediaType parses
52// the mechanism specified in RFC 2231 only. The value for "name" we get here would
53// already be decoded properly for standards-compliant headers, like
54// "filename*0*=UTF-8”%...; filename*1*=%.... We'll look for Q/B-word encoding
55// markers ("=?"-prefix or "?="-suffix) and try to decode if present. This would
56// only cause trouble for filenames having this prefix/suffix.
57func tryDecodeParam(log mlog.Log, name string) string {
58 if name == "" || !strings.HasPrefix(name, "=?") && !strings.HasSuffix(name, "?=") {
59 return name
60 }
61 // todo: find where this is allowed. it seems quite common. perhaps we should remove the pedantic check?
62 if mox.Pedantic {
63 log.Debug("attachment contains rfc2047 q/b-word-encoded mime parameter instead of rfc2231-encoded", slog.String("name", name))
64 return name
65 }
66 s, err := wordDecoder.DecodeHeader(name)
67 if err != nil {
68 log.Debugx("q/b-word decoding mime parameter", err, slog.String("name", name))
69 return name
70 }
71 return s
72}
73
74// todo: mime.FormatMediaType does not wrap long lines. should do it ourselves, and split header into several parts (if commonly supported).
75
76func messageItemMoreHeaders(moreHeaders []string, pm ParsedMessage) (l [][2]string) {
77 for _, k := range moreHeaders {
78 k = textproto.CanonicalMIMEHeaderKey(k)
79 for _, v := range pm.Headers[k] {
80 l = append(l, [2]string{k, v})
81 }
82 }
83 return l
84}
85
86func messageItem(log mlog.Log, m store.Message, state *msgState, moreHeaders []string) (MessageItem, error) {
87 headers := len(moreHeaders) > 0
88 pm, err := parsedMessage(log, m, state, false, true, headers)
89 if err != nil && errors.Is(err, message.ErrHeader) && headers {
90 log.Debugx("load message item without parsing headers after error", err, slog.Int64("msgid", m.ID))
91 pm, err = parsedMessage(log, m, state, false, true, false)
92 }
93 if err != nil {
94 return MessageItem{}, fmt.Errorf("parsing message %d for item: %v", m.ID, err)
95 }
96 // Clear largish unused data.
97 m.MsgPrefix = nil
98 m.ParsedBuf = nil
99 l := messageItemMoreHeaders(moreHeaders, pm)
100 return MessageItem{m, pm.envelope, pm.attachments, pm.isSigned, pm.isEncrypted, pm.firstLine, true, l}, nil
101}
102
103// formatFirstLine returns a line the client can display next to the subject line
104// in a mailbox. It will replace quoted text, and any prefixing "On ... write:"
105// line with "[...]" so only new and useful information will be displayed.
106// Trailing signatures are not included.
107func formatFirstLine(r io.Reader) (string, error) {
108 // We look quite a bit of lines ahead for trailing signatures with trailing empty lines.
109 var lines []string
110 scanner := bufio.NewScanner(r)
111 ensureLines := func() {
112 for len(lines) < 10 && scanner.Scan() {
113 lines = append(lines, strings.TrimSpace(scanner.Text()))
114 }
115 }
116 ensureLines()
117
118 isSnipped := func(s string) bool {
119 return s == "[...]" || s == "[…]" || s == "..."
120 }
121
122 nextLineQuoted := func(i int) bool {
123 if i+1 < len(lines) && lines[i+1] == "" {
124 i++
125 }
126 return i+1 < len(lines) && (strings.HasPrefix(lines[i+1], ">") || isSnipped(lines[i+1]))
127 }
128
129 // Remainder is signature if we see a line with only and minimum 2 dashes, and
130 // there are no more empty lines, and there aren't more than 5 lines left.
131 isSignature := func() bool {
132 if len(lines) == 0 || !strings.HasPrefix(lines[0], "--") || strings.Trim(strings.TrimSpace(lines[0]), "-") != "" {
133 return false
134 }
135 l := lines[1:]
136 for len(l) > 0 && l[len(l)-1] == "" {
137 l = l[:len(l)-1]
138 }
139 if len(l) >= 5 {
140 return false
141 }
142 for _, line := range l {
143 if line == "" {
144 return false
145 }
146 }
147 return true
148 }
149
150 result := ""
151
152 resultSnipped := func() bool {
153 return strings.HasSuffix(result, "[...]\n") || strings.HasSuffix(result, "[…]")
154 }
155
156 // Quick check for initial wrapped "On ... wrote:" line.
157 if len(lines) > 3 && strings.HasPrefix(lines[0], "On ") && !strings.HasSuffix(lines[0], "wrote:") && strings.HasSuffix(lines[1], ":") && nextLineQuoted(1) {
158 result = "[...]\n"
159 lines = lines[3:]
160 ensureLines()
161 }
162
163 for ; len(lines) > 0 && !isSignature(); ensureLines() {
164 line := lines[0]
165 if strings.HasPrefix(line, ">") {
166 if !resultSnipped() {
167 result += "[...]\n"
168 }
169 lines = lines[1:]
170 continue
171 }
172 if line == "" {
173 lines = lines[1:]
174 continue
175 }
176 // Check for a "On <date>, <person> wrote:", we require digits before a quoted
177 // line, with an optional empty line in between. If we don't have any text yet, we
178 // don't require the digits.
179 if strings.HasSuffix(line, ":") && (strings.ContainsAny(line, "0123456789") || result == "") && nextLineQuoted(0) {
180 if !resultSnipped() {
181 result += "[...]\n"
182 }
183 lines = lines[1:]
184 continue
185 }
186 // Skip possibly duplicate snipping by author.
187 if !isSnipped(line) || !resultSnipped() {
188 result += line + "\n"
189 }
190 lines = lines[1:]
191 if len(result) > 250 {
192 break
193 }
194 }
195 if len(result) > 250 {
196 result = result[:230] + "..."
197 }
198 return result, scanner.Err()
199}
200
201func parsedMessage(log mlog.Log, m store.Message, state *msgState, full, msgitem, msgitemHeaders bool) (pm ParsedMessage, rerr error) {
202 pm.ViewMode = store.ModeText // Valid default, in case this makes it to frontend.
203
204 if full || msgitem {
205 if !state.ensurePart(m, true) {
206 return pm, state.err
207 }
208 if full {
209 pm.Part = *state.part
210 }
211 } else {
212 if !state.ensurePart(m, false) {
213 return pm, state.err
214 }
215 }
216
217 // todo: we should store this form in message.Part, requires a data structure update.
218
219 convertAddrs := func(l []message.Address) []MessageAddress {
220 r := make([]MessageAddress, len(l))
221 for i, a := range l {
222 d, err := dns.ParseDomain(a.Host)
223 log.Check(err, "parsing domain")
224 if err != nil {
225 d = dns.Domain{ASCII: a.Host}
226 }
227 r[i] = MessageAddress{a.Name, a.User, d}
228 }
229 return r
230 }
231
232 if full || msgitem {
233 env := MessageEnvelope{}
234 if state.part.Envelope != nil {
235 e := *state.part.Envelope
236 env.Date = e.Date
237 env.Subject = e.Subject
238 env.InReplyTo = e.InReplyTo
239 env.MessageID = e.MessageID
240 env.From = convertAddrs(e.From)
241 env.Sender = convertAddrs(e.Sender)
242 env.ReplyTo = convertAddrs(e.ReplyTo)
243 env.To = convertAddrs(e.To)
244 env.CC = convertAddrs(e.CC)
245 env.BCC = convertAddrs(e.BCC)
246 }
247 pm.envelope = env
248 }
249
250 if (full || msgitemHeaders) && state.part.BodyOffset > 0 {
251 hdrs, err := state.part.Header()
252 if err != nil {
253 return ParsedMessage{}, fmt.Errorf("parsing headers: %w", err)
254 }
255 pm.Headers = hdrs
256
257 pm.ListReplyAddress = parseListPostAddress(hdrs.Get("List-Post"))
258 } else {
259 pm.Headers = map[string][]string{}
260 }
261
262 pm.Texts = []string{}
263
264 // We track attachments from multipart/mixed differently from other attachments.
265 // The others are often inline, sometimes just some logo's in HTML alternative
266 // messages. We want to have our mixed attachments at the start of the list, but
267 // our descent-first parsing would result in inline messages first in the typical
268 // message.
269 var attachmentsMixed []Attachment
270 var attachmentsOther []Attachment
271
272 addAttachment := func(a Attachment, isMixed bool) {
273 if isMixed {
274 attachmentsMixed = append(attachmentsMixed, a)
275 } else {
276 attachmentsOther = append(attachmentsOther, a)
277 }
278 }
279
280 // todo: how should we handle messages where a user prefers html, and we want to show it, but it's a DSN that also has textual-only parts? e.g. gmail's dsn where the first part is multipart/related with multipart/alternative, and second part is the regular message/delivery-status. we want to display both the html and the text.
281
282 var usePart func(p message.Part, index int, parent *message.Part, path []int, parentMixed bool)
283 usePart = func(p message.Part, index int, parent *message.Part, path []int, parentMixed bool) {
284 mt := p.MediaType + "/" + p.MediaSubType
285 newParentMixed := mt == "MULTIPART/MIXED"
286 for i, sp := range p.Parts {
287 if mt == "MULTIPART/SIGNED" && i >= 1 {
288 continue
289 }
290 usePart(sp, i, &p, append(append([]int{}, path...), i), newParentMixed)
291 }
292 switch mt {
293 case "TEXT/PLAIN", "/":
294 // Don't include if Content-Disposition attachment.
295 if full || msgitem {
296 disp, name, err := p.DispositionFilename()
297 if err != nil && errors.Is(err, message.ErrParamEncoding) {
298 log.Debugx("parsing disposition/filename", err)
299 } else if err != nil {
300 rerr = fmt.Errorf("reading disposition/filename: %v", err)
301 return
302 }
303 if strings.EqualFold(disp, "attachment") {
304 addAttachment(Attachment{path, name, p}, parentMixed)
305 return
306 }
307 }
308
309 if full {
310 buf, err := io.ReadAll(&moxio.LimitReader{R: p.ReaderUTF8OrBinary(), Limit: 2 * 1024 * 1024})
311 if err != nil {
312 rerr = fmt.Errorf("reading text part: %v", err)
313 return
314 }
315 pm.Texts = append(pm.Texts, string(buf))
316 }
317 if msgitem && pm.firstLine == "" {
318 pm.firstLine, rerr = formatFirstLine(p.ReaderUTF8OrBinary())
319 if rerr != nil {
320 rerr = fmt.Errorf("reading text for first line snippet: %v", rerr)
321 return
322 }
323 }
324
325 case "TEXT/HTML":
326 pm.HasHTML = true
327
328 default:
329 // todo: see if there is a common nesting messages that are both signed and encrypted.
330 if parent == nil && mt == "MULTIPART/SIGNED" {
331 pm.isSigned = true
332 }
333 if parent == nil && mt == "MULTIPART/ENCRYPTED" {
334 pm.isEncrypted = true
335 }
336 // todo: possibly do not include anything below multipart/alternative that starts with text/html, they may be cids. perhaps have a separate list of attachments for the text vs html version?
337 if p.MediaType != "MULTIPART" {
338 var parentct string
339 if parent != nil {
340 parentct = parent.MediaType + "/" + parent.MediaSubType
341 }
342
343 // Recognize DSNs.
344 if parentct == "MULTIPART/REPORT" && index == 1 && (mt == "MESSAGE/GLOBAL-DELIVERY-STATUS" || mt == "MESSAGE/DELIVERY-STATUS") {
345 if full {
346 buf, err := io.ReadAll(&moxio.LimitReader{R: p.ReaderUTF8OrBinary(), Limit: 1024 * 1024})
347 if err != nil {
348 rerr = fmt.Errorf("reading text part: %v", err)
349 return
350 }
351 pm.Texts = append(pm.Texts, string(buf))
352 }
353 return
354 }
355 if parentct == "MULTIPART/REPORT" && index == 2 && (mt == "MESSAGE/GLOBAL-HEADERS" || mt == "TEXT/RFC822-HEADERS") {
356 if full {
357 buf, err := io.ReadAll(&moxio.LimitReader{R: p.ReaderUTF8OrBinary(), Limit: 1024 * 1024})
358 if err != nil {
359 rerr = fmt.Errorf("reading text part: %v", err)
360 return
361 }
362 pm.Texts = append(pm.Texts, string(buf))
363 }
364 return
365 }
366 if parentct == "MULTIPART/REPORT" && index == 2 && (mt == "MESSAGE/GLOBAL" || mt == "TEXT/RFC822") {
367 addAttachment(Attachment{path, "original.eml", p}, parentMixed)
368 return
369 }
370
371 name := tryDecodeParam(log, p.ContentTypeParams["name"])
372 if name == "" && (full || msgitem) {
373 // todo: should have this, and perhaps all content-* headers, preparsed in message.Part?
374 h, err := p.Header()
375 log.Check(err, "parsing attachment headers", slog.Int64("msgid", m.ID))
376 cp := h.Get("Content-Disposition")
377 if cp != "" {
378 _, params, err := mime.ParseMediaType(cp)
379 log.Check(err, "parsing content-disposition", slog.String("cp", cp))
380 name = tryDecodeParam(log, params["filename"])
381 }
382 }
383 addAttachment(Attachment{path, name, p}, parentMixed)
384 }
385 }
386 }
387 usePart(*state.part, -1, nil, []int{}, false)
388
389 pm.attachments = []Attachment{}
390 pm.attachments = append(pm.attachments, attachmentsMixed...)
391 pm.attachments = append(pm.attachments, attachmentsOther...)
392
393 if rerr == nil {
394 pm.ID = m.ID
395 }
396 return
397}
398
399// parses List-Post header, returning an address if it could be found, and nil otherwise.
400func parseListPostAddress(s string) *MessageAddress {
401 /*
402 Examples:
403 List-Post: <mailto:list@host.com>
404 List-Post: <mailto:moderator@host.com> (Postings are Moderated)
405 List-Post: <mailto:moderator@host.com?subject=list%20posting>
406 List-Post: NO (posting not allowed on this list)
407 List-Post: <https://groups.google.com/group/golang-dev/post>, <mailto:golang-dev@googlegroups.com>
408 */
409 s = strings.TrimSpace(s)
410 for s != "" {
411 if !strings.HasPrefix(s, "<") {
412 return nil
413 }
414 addr, ns, found := strings.Cut(s[1:], ">")
415 if !found {
416 return nil
417 }
418 if strings.HasPrefix(addr, "mailto:") {
419 u, err := url.Parse(addr)
420 if err != nil {
421 return nil
422 }
423 addr, err := smtp.ParseAddress(u.Opaque)
424 if err != nil {
425 return nil
426 }
427 return &MessageAddress{User: addr.Localpart.String(), Domain: addr.Domain}
428 }
429 s = strings.TrimSpace(ns)
430 s = strings.TrimPrefix(s, ",")
431 s = strings.TrimSpace(s)
432 }
433 return nil
434}
435