1package webmail
2
3import (
4 "bufio"
5 "fmt"
6 "io"
7 "log/slog"
8 "mime"
9 "net/url"
10 "strings"
11
12 "golang.org/x/text/encoding/ianaindex"
13
14 "github.com/mjl-/mox/dns"
15 "github.com/mjl-/mox/message"
16 "github.com/mjl-/mox/mlog"
17 "github.com/mjl-/mox/mox-"
18 "github.com/mjl-/mox/moxio"
19 "github.com/mjl-/mox/smtp"
20 "github.com/mjl-/mox/store"
21)
22
23// todo: we should have all needed information for messageItem in store.Message (perhaps some data in message.Part) for fast access, not having to parse the on-disk message file.
24
25var wordDecoder = mime.WordDecoder{
26 CharsetReader: func(charset string, r io.Reader) (io.Reader, error) {
27 switch strings.ToLower(charset) {
28 case "", "us-ascii", "utf-8":
29 return r, nil
30 }
31 enc, _ := ianaindex.MIME.Encoding(charset)
32 if enc == nil {
33 enc, _ = ianaindex.IANA.Encoding(charset)
34 }
35 if enc == nil {
36 return r, fmt.Errorf("unknown charset %q", charset)
37 }
38 return enc.NewDecoder().Reader(r), nil
39 },
40}
41
42// Attempt q/b-word-decode name, coming from Content-Type "name" field or
43// Content-Disposition "filename" field.
44//
45// RFC 2231 specify an encoding for non-ascii values in mime header parameters. But
46// it appears common practice to instead just q/b-word encode the values.
47// Thunderbird and gmail.com do this for the Content-Type "name" parameter.
48// gmail.com also does that for the Content-Disposition "filename" parameter, where
49// Thunderbird uses the RFC 2231-defined encoding. Go's mime.ParseMediaType parses
50// the mechanism specified in RFC 2231 only. The value for "name" we get here would
51// already be decoded properly for standards-compliant headers, like
52// "filename*0*=UTF-8”%...; filename*1*=%.... We'll look for Q/B-word encoding
53// markers ("=?"-prefix or "?="-suffix) and try to decode if present. This would
54// only cause trouble for filenames having this prefix/suffix.
55func tryDecodeParam(log mlog.Log, name string) string {
56 if name == "" || !strings.HasPrefix(name, "=?") && !strings.HasSuffix(name, "?=") {
57 return name
58 }
59 // todo: find where this is allowed. it seems quite common. perhaps we should remove the pedantic check?
60 if mox.Pedantic {
61 log.Debug("attachment contains rfc2047 q/b-word-encoded mime parameter instead of rfc2231-encoded", slog.String("name", name))
62 return name
63 }
64 s, err := wordDecoder.DecodeHeader(name)
65 if err != nil {
66 log.Debugx("q/b-word decoding mime parameter", err, slog.String("name", name))
67 return name
68 }
69 return s
70}
71
72// todo: mime.FormatMediaType does not wrap long lines. should do it ourselves, and split header into several parts (if commonly supported).
73
74func messageItem(log mlog.Log, m store.Message, state *msgState) (MessageItem, error) {
75 pm, err := parsedMessage(log, m, state, false, true)
76 if err != nil {
77 return MessageItem{}, fmt.Errorf("parsing message %d for item: %v", m.ID, err)
78 }
79 // Clear largish unused data.
80 m.MsgPrefix = nil
81 m.ParsedBuf = nil
82 return MessageItem{m, pm.envelope, pm.attachments, pm.isSigned, pm.isEncrypted, pm.firstLine, true}, nil
83}
84
85// formatFirstLine returns a line the client can display next to the subject line
86// in a mailbox. It will replace quoted text, and any prefixing "On ... write:"
87// line with "[...]" so only new and useful information will be displayed.
88// Trailing signatures are not included.
89func formatFirstLine(r io.Reader) (string, error) {
90 // We look quite a bit of lines ahead for trailing signatures with trailing empty lines.
91 var lines []string
92 scanner := bufio.NewScanner(r)
93 ensureLines := func() {
94 for len(lines) < 10 && scanner.Scan() {
95 lines = append(lines, strings.TrimSpace(scanner.Text()))
96 }
97 }
98 ensureLines()
99
100 isSnipped := func(s string) bool {
101 return s == "[...]" || s == "[…]" || s == "..."
102 }
103
104 nextLineQuoted := func(i int) bool {
105 if i+1 < len(lines) && lines[i+1] == "" {
106 i++
107 }
108 return i+1 < len(lines) && (strings.HasPrefix(lines[i+1], ">") || isSnipped(lines[i+1]))
109 }
110
111 // Remainder is signature if we see a line with only and minimum 2 dashes, and
112 // there are no more empty lines, and there aren't more than 5 lines left.
113 isSignature := func() bool {
114 if len(lines) == 0 || !strings.HasPrefix(lines[0], "--") || strings.Trim(strings.TrimSpace(lines[0]), "-") != "" {
115 return false
116 }
117 l := lines[1:]
118 for len(l) > 0 && l[len(l)-1] == "" {
119 l = l[:len(l)-1]
120 }
121 if len(l) >= 5 {
122 return false
123 }
124 for _, line := range l {
125 if line == "" {
126 return false
127 }
128 }
129 return true
130 }
131
132 result := ""
133
134 resultSnipped := func() bool {
135 return strings.HasSuffix(result, "[...]\n") || strings.HasSuffix(result, "[…]")
136 }
137
138 // Quick check for initial wrapped "On ... wrote:" line.
139 if len(lines) > 3 && strings.HasPrefix(lines[0], "On ") && !strings.HasSuffix(lines[0], "wrote:") && strings.HasSuffix(lines[1], ":") && nextLineQuoted(1) {
140 result = "[...]\n"
141 lines = lines[3:]
142 ensureLines()
143 }
144
145 for ; len(lines) > 0 && !isSignature(); ensureLines() {
146 line := lines[0]
147 if strings.HasPrefix(line, ">") {
148 if !resultSnipped() {
149 result += "[...]\n"
150 }
151 lines = lines[1:]
152 continue
153 }
154 if line == "" {
155 lines = lines[1:]
156 continue
157 }
158 // Check for a "On <date>, <person> wrote:", we require digits before a quoted
159 // line, with an optional empty line in between. If we don't have any text yet, we
160 // don't require the digits.
161 if strings.HasSuffix(line, ":") && (strings.ContainsAny(line, "0123456789") || result == "") && nextLineQuoted(0) {
162 if !resultSnipped() {
163 result += "[...]\n"
164 }
165 lines = lines[1:]
166 continue
167 }
168 // Skip possibly duplicate snipping by author.
169 if !isSnipped(line) || !resultSnipped() {
170 result += line + "\n"
171 }
172 lines = lines[1:]
173 if len(result) > 250 {
174 break
175 }
176 }
177 if len(result) > 250 {
178 result = result[:230] + "..."
179 }
180 return result, scanner.Err()
181}
182
183func parsedMessage(log mlog.Log, m store.Message, state *msgState, full, msgitem bool) (pm ParsedMessage, rerr error) {
184 pm.ViewMode = store.ModeText // Valid default, in case this makes it to frontend.
185
186 if full || msgitem {
187 if !state.ensurePart(m, true) {
188 return pm, state.err
189 }
190 if full {
191 pm.Part = *state.part
192 }
193 } else {
194 if !state.ensurePart(m, false) {
195 return pm, state.err
196 }
197 }
198
199 // todo: we should store this form in message.Part, requires a data structure update.
200
201 convertAddrs := func(l []message.Address) []MessageAddress {
202 r := make([]MessageAddress, len(l))
203 for i, a := range l {
204 d, err := dns.ParseDomain(a.Host)
205 log.Check(err, "parsing domain")
206 if err != nil {
207 d = dns.Domain{ASCII: a.Host}
208 }
209 r[i] = MessageAddress{a.Name, a.User, d}
210 }
211 return r
212 }
213
214 if full || msgitem {
215 env := MessageEnvelope{}
216 if state.part.Envelope != nil {
217 e := *state.part.Envelope
218 env.Date = e.Date
219 env.Subject = e.Subject
220 env.InReplyTo = e.InReplyTo
221 env.MessageID = e.MessageID
222 env.From = convertAddrs(e.From)
223 env.Sender = convertAddrs(e.Sender)
224 env.ReplyTo = convertAddrs(e.ReplyTo)
225 env.To = convertAddrs(e.To)
226 env.CC = convertAddrs(e.CC)
227 env.BCC = convertAddrs(e.BCC)
228 }
229 pm.envelope = env
230 }
231
232 if full && state.part.BodyOffset > 0 {
233 hdrs, err := state.part.Header()
234 if err != nil {
235 return ParsedMessage{}, fmt.Errorf("parsing headers: %w", err)
236 }
237 pm.Headers = hdrs
238
239 pm.ListReplyAddress = parseListPostAddress(hdrs.Get("List-Post"))
240 } else {
241 pm.Headers = map[string][]string{}
242 }
243
244 pm.Texts = []string{}
245
246 // We track attachments from multipart/mixed differently from other attachments.
247 // The others are often inline, sometimes just some logo's in HTML alternative
248 // messages. We want to have our mixed attachments at the start of the list, but
249 // our descent-first parsing would result in inline messages first in the typical
250 // message.
251 var attachmentsMixed []Attachment
252 var attachmentsOther []Attachment
253
254 addAttachment := func(a Attachment, isMixed bool) {
255 if isMixed {
256 attachmentsMixed = append(attachmentsMixed, a)
257 } else {
258 attachmentsOther = append(attachmentsOther, a)
259 }
260 }
261
262 // todo: how should we handle messages where a user prefers html, and we want to show it, but it's a DSN that also has textual-only parts? e.g. gmail's dsn where the first part is multipart/related with multipart/alternative, and second part is the regular message/delivery-status. we want to display both the html and the text.
263
264 var usePart func(p message.Part, index int, parent *message.Part, path []int, parentMixed bool)
265 usePart = func(p message.Part, index int, parent *message.Part, path []int, parentMixed bool) {
266 mt := p.MediaType + "/" + p.MediaSubType
267 newParentMixed := mt == "MULTIPART/MIXED"
268 for i, sp := range p.Parts {
269 if mt == "MULTIPART/SIGNED" && i >= 1 {
270 continue
271 }
272 usePart(sp, i, &p, append(append([]int{}, path...), i), newParentMixed)
273 }
274 switch mt {
275 case "TEXT/PLAIN", "/":
276 // Don't include if Content-Disposition attachment.
277 if full || msgitem {
278 // todo: should have this, and perhaps all content-* headers, preparsed in message.Part?
279 h, err := p.Header()
280 log.Check(err, "parsing attachment headers", slog.Int64("msgid", m.ID))
281 cp := h.Get("Content-Disposition")
282 if cp != "" {
283 disp, params, err := mime.ParseMediaType(cp)
284 log.Check(err, "parsing content-disposition", slog.String("cp", cp))
285 if strings.EqualFold(disp, "attachment") {
286 name := tryDecodeParam(log, p.ContentTypeParams["name"])
287 if name == "" {
288 name = tryDecodeParam(log, params["filename"])
289 }
290 addAttachment(Attachment{path, name, p}, parentMixed)
291 return
292 }
293 }
294 }
295
296 if full {
297 buf, err := io.ReadAll(&moxio.LimitReader{R: p.ReaderUTF8OrBinary(), Limit: 2 * 1024 * 1024})
298 if err != nil {
299 rerr = fmt.Errorf("reading text part: %v", err)
300 return
301 }
302 pm.Texts = append(pm.Texts, string(buf))
303 }
304 if msgitem && pm.firstLine == "" {
305 pm.firstLine, rerr = formatFirstLine(p.ReaderUTF8OrBinary())
306 if rerr != nil {
307 rerr = fmt.Errorf("reading text for first line snippet: %v", rerr)
308 return
309 }
310 }
311
312 case "TEXT/HTML":
313 pm.HasHTML = true
314
315 default:
316 // todo: see if there is a common nesting messages that are both signed and encrypted.
317 if parent == nil && mt == "MULTIPART/SIGNED" {
318 pm.isSigned = true
319 }
320 if parent == nil && mt == "MULTIPART/ENCRYPTED" {
321 pm.isEncrypted = true
322 }
323 // todo: possibly do not include anything below multipart/alternative that starts with text/html, they may be cids. perhaps have a separate list of attachments for the text vs html version?
324 if p.MediaType != "MULTIPART" {
325 var parentct string
326 if parent != nil {
327 parentct = parent.MediaType + "/" + parent.MediaSubType
328 }
329
330 // Recognize DSNs.
331 if parentct == "MULTIPART/REPORT" && index == 1 && (mt == "MESSAGE/GLOBAL-DELIVERY-STATUS" || mt == "MESSAGE/DELIVERY-STATUS") {
332 if full {
333 buf, err := io.ReadAll(&moxio.LimitReader{R: p.ReaderUTF8OrBinary(), Limit: 1024 * 1024})
334 if err != nil {
335 rerr = fmt.Errorf("reading text part: %v", err)
336 return
337 }
338 pm.Texts = append(pm.Texts, string(buf))
339 }
340 return
341 }
342 if parentct == "MULTIPART/REPORT" && index == 2 && (mt == "MESSAGE/GLOBAL-HEADERS" || mt == "TEXT/RFC822-HEADERS") {
343 if full {
344 buf, err := io.ReadAll(&moxio.LimitReader{R: p.ReaderUTF8OrBinary(), Limit: 1024 * 1024})
345 if err != nil {
346 rerr = fmt.Errorf("reading text part: %v", err)
347 return
348 }
349 pm.Texts = append(pm.Texts, string(buf))
350 }
351 return
352 }
353 if parentct == "MULTIPART/REPORT" && index == 2 && (mt == "MESSAGE/GLOBAL" || mt == "TEXT/RFC822") {
354 addAttachment(Attachment{path, "original.eml", p}, parentMixed)
355 return
356 }
357
358 name := tryDecodeParam(log, p.ContentTypeParams["name"])
359 if name == "" && (full || msgitem) {
360 // todo: should have this, and perhaps all content-* headers, preparsed in message.Part?
361 h, err := p.Header()
362 log.Check(err, "parsing attachment headers", slog.Int64("msgid", m.ID))
363 cp := h.Get("Content-Disposition")
364 if cp != "" {
365 _, params, err := mime.ParseMediaType(cp)
366 log.Check(err, "parsing content-disposition", slog.String("cp", cp))
367 name = tryDecodeParam(log, params["filename"])
368 }
369 }
370 addAttachment(Attachment{path, name, p}, parentMixed)
371 }
372 }
373 }
374 usePart(*state.part, -1, nil, []int{}, false)
375
376 pm.attachments = []Attachment{}
377 pm.attachments = append(pm.attachments, attachmentsMixed...)
378 pm.attachments = append(pm.attachments, attachmentsOther...)
379
380 if rerr == nil {
381 pm.ID = m.ID
382 }
383 return
384}
385
386// parses List-Post header, returning an address if it could be found, and nil otherwise.
387func parseListPostAddress(s string) *MessageAddress {
388 /*
389 Examples:
390 List-Post: <mailto:list@host.com>
391 List-Post: <mailto:moderator@host.com> (Postings are Moderated)
392 List-Post: <mailto:moderator@host.com?subject=list%20posting>
393 List-Post: NO (posting not allowed on this list)
394 List-Post: <https://groups.google.com/group/golang-dev/post>, <mailto:golang-dev@googlegroups.com>
395 */
396 s = strings.TrimSpace(s)
397 for s != "" {
398 if !strings.HasPrefix(s, "<") {
399 return nil
400 }
401 addr, ns, found := strings.Cut(s[1:], ">")
402 if !found {
403 return nil
404 }
405 if strings.HasPrefix(addr, "mailto:") {
406 u, err := url.Parse(addr)
407 if err != nil {
408 return nil
409 }
410 addr, err := smtp.ParseAddress(u.Opaque)
411 if err != nil {
412 return nil
413 }
414 return &MessageAddress{User: addr.Localpart.String(), Domain: addr.Domain}
415 }
416 s = strings.TrimSpace(ns)
417 s = strings.TrimPrefix(s, ",")
418 s = strings.TrimSpace(s)
419 }
420 return nil
421}
422