1package http
2
3import (
4 "compress/gzip"
5 "encoding/base64"
6 "errors"
7 "fmt"
8 "io"
9 "io/fs"
10 "log/slog"
11 "net/http"
12 "os"
13 "path/filepath"
14 "sort"
15 "strconv"
16 "strings"
17 "sync"
18 "time"
19
20 "github.com/mjl-/mox/mlog"
21)
22
23// todo: consider caching gzipped responses from forward handlers too. we would need to read the responses (handle up to perhaps 2mb), hash the data (blake2b seems fast), check if we have the gzip content for that hash, cache it on second request. keep around entries for non-yet-cached hashes, with some limit and lru eviction policy. we have to recognize some content-types as not applicable and do direct streaming compression, e.g. for text/event-stream. and we need to detect when backend server could be slowly sending out data and abort the caching attempt. downside is always that we need to read the whole response before and hash it before we can send our response. it is best if the backend just responds with gzip itself though. compression needs more cpu than hashing (at least 10x), but it's only worth it with enough hits.
24
25// Cache for gzipped static files.
26var staticgzcache gzcache
27
28type gzcache struct {
29 dir string // Where all files are stored.
30
31 // Max total size of combined files in cache. When adding a new entry, the least
32 // recently used entries are evicted to stay below this size.
33 maxSize int64
34
35 sync.Mutex
36
37 // Total on-disk size of compressed data. Not larger than maxSize. We can
38 // temporarily have more bytes in use because while/after evicting, a writer may
39 // still have the old removed file open.
40 size int64
41
42 // Indexed by effective path, based on handler.
43 paths map[string]gzfile
44
45 // Only with files we completed compressing, kept ordered by atime. We evict from
46 // oldest. On use, we take entries out and put them at newest.
47 oldest, newest *pathUse
48}
49
50type gzfile struct {
51 // Whether compressing in progress. If a new request comes in while we are already
52 // compressing, for simplicity of code we just compress again for that client.
53 compressing bool
54
55 mtime int64 // If mtime changes, we remove entry from cache.
56 atime int64 // For LRU.
57 gzsize int64 // Compressed size, used in Content-Length header.
58 use *pathUse // Only set after compressing finished.
59}
60
61type pathUse struct {
62 prev, next *pathUse // Double-linked list.
63 path string
64}
65
66// Initialize staticgzcache from on-disk directory.
67// The path and mtime are in the filename, the atime is in the file itself.
68func loadStaticGzipCache(dir string, maxSize int64) {
69 staticgzcache = gzcache{
70 dir: dir,
71 maxSize: maxSize,
72 paths: map[string]gzfile{},
73 }
74
75 // todo future: should we split cached files in sub directories, so we don't end up with one huge directory?
76 os.MkdirAll(dir, 0700)
77 entries, err := os.ReadDir(dir)
78 if err != nil && !os.IsNotExist(err) {
79 pkglog.Errorx("listing static gzip cache files", err, slog.String("dir", dir))
80 }
81 for _, e := range entries {
82 name := e.Name()
83 var err error
84 if !strings.HasSuffix(name, ".gz") {
85 err = errors.New("missing .gz suffix")
86 }
87 var path, xpath, mtimestr string
88 if err == nil {
89 var ok bool
90 xpath, mtimestr, ok = strings.Cut(strings.TrimRight(name, ".gz"), "+")
91 if !ok {
92 err = fmt.Errorf("missing + in filename")
93 }
94 }
95 if err == nil {
96 var pathbuf []byte
97 pathbuf, err = base64.RawURLEncoding.DecodeString(xpath)
98 if err == nil {
99 path = string(pathbuf)
100 }
101 }
102 var mtime int64
103 if err == nil {
104 mtime, err = strconv.ParseInt(mtimestr, 16, 64)
105 }
106 var fi fs.FileInfo
107 if err == nil {
108 fi, err = e.Info()
109 }
110 var atime int64
111 if err == nil {
112 atime, err = statAtime(fi.Sys())
113 }
114 if err != nil {
115 pkglog.Infox("removing unusable/unrecognized file in static gzip cache dir", err)
116 xerr := os.Remove(filepath.Join(dir, name))
117 pkglog.Check(xerr, "removing unusable file in static gzip cache dir",
118 slog.Any("error", err),
119 slog.String("dir", dir),
120 slog.String("filename", name))
121 continue
122 }
123 staticgzcache.paths[path] = gzfile{
124 mtime: mtime,
125 atime: atime,
126 gzsize: fi.Size(),
127 use: &pathUse{path: path},
128 }
129 staticgzcache.size += fi.Size()
130 }
131
132 pathatimes := make([]struct {
133 path string
134 atime int64
135 }, len(staticgzcache.paths))
136 i := 0
137 for k, gf := range staticgzcache.paths {
138 pathatimes[i].path = k
139 pathatimes[i].atime = gf.atime
140 i++
141 }
142 sort.Slice(pathatimes, func(i, j int) bool {
143 return pathatimes[i].atime < pathatimes[j].atime
144 })
145 for _, pa := range pathatimes {
146 staticgzcache.push(staticgzcache.paths[pa.path].use)
147 }
148
149 // Ensure cache size is OK for current config.
150 staticgzcache.evictFor(0)
151}
152
153// Evict entries so size bytes are available.
154// Must be called with lock held.
155func (c *gzcache) evictFor(size int64) {
156 for c.size+size > c.maxSize && c.oldest != nil {
157 c.evictPath(c.oldest.path)
158 }
159}
160
161// remove path from cache.
162// Must be called with lock held.
163func (c *gzcache) evictPath(path string) {
164 gf := c.paths[path]
165
166 delete(c.paths, path)
167 c.unlink(gf.use)
168 c.size -= gf.gzsize
169 err := os.Remove(staticCachePath(c.dir, path, gf.mtime))
170 pkglog.Check(err, "removing cached gzipped static file", slog.String("path", path))
171}
172
173// Open cached file for path, requiring it has mtime. If there is no usable cached
174// file, a nil file is returned and the caller should compress and add to the cache
175// with startPath and finishPath. No usable cached file means the path isn't in the
176// cache, or its mtime is different, or there is an entry but it is new and being
177// compressed at the moment. If a usable cached file was found, it is opened and
178// returned, along with its compressed/on-disk size.
179func (c *gzcache) openPath(path string, mtime int64) (*os.File, int64) {
180 c.Lock()
181 defer c.Unlock()
182
183 gf, ok := c.paths[path]
184 if !ok || gf.compressing {
185 return nil, 0
186 }
187 if gf.mtime != mtime {
188 // File has changed, remove old entry. Caller will add to cache again.
189 c.evictPath(path)
190 return nil, 0
191 }
192
193 p := staticCachePath(c.dir, path, gf.mtime)
194 f, err := os.Open(p)
195 if err != nil {
196 pkglog.Errorx("open static cached gzip file, removing from cache", err, slog.String("path", path))
197 // Perhaps someone removed the file? Remove from cache, it will be recreated.
198 c.evictPath(path)
199 return nil, 0
200 }
201
202 gf.atime = time.Now().UnixNano()
203 c.unlink(gf.use)
204 c.push(gf.use)
205 c.paths[path] = gf
206
207 return f, gf.gzsize
208}
209
210// startPath attempts to add an entry to the cache for a new cached compressed
211// file. If there is already an entry but it isn't done compressing yet, false is
212// returned and the caller can still compress and respond but the entry cannot be
213// added to the cache. If the entry is being added, the caller must call finishPath
214// or abortPath.
215func (c *gzcache) startPath(path string, mtime int64) bool {
216 c.Lock()
217 defer c.Unlock()
218
219 if _, ok := c.paths[path]; ok {
220 return false
221 }
222 // note: no "use" yet, we only set that when we finish, so we don't have to clean up on abort.
223 c.paths[path] = gzfile{compressing: true, mtime: mtime}
224 return true
225}
226
227// finishPath completes adding an entry to the cache, marking the entry as
228// compressed, accounting for its size, and marking its atime.
229func (c *gzcache) finishPath(path string, gzsize int64) {
230 c.Lock()
231 defer c.Unlock()
232
233 c.evictFor(gzsize)
234
235 gf := c.paths[path]
236 gf.compressing = false
237 gf.gzsize = gzsize
238 gf.atime = time.Now().UnixNano()
239 gf.use = &pathUse{path: path}
240 c.paths[path] = gf
241 c.size += gzsize
242 c.push(gf.use)
243}
244
245// abortPath marks an entry as no longer being added to the cache.
246func (c *gzcache) abortPath(path string) {
247 c.Lock()
248 defer c.Unlock()
249
250 delete(c.paths, path)
251 // note: gzfile.use isn't set yet.
252}
253
254// push inserts the "pathUse" to the head of the LRU doubly-linked list, unlinking
255// it first if needed.
256func (c *gzcache) push(u *pathUse) {
257 c.unlink(u)
258 u.prev = c.newest
259 if c.newest != nil {
260 c.newest.next = u
261 }
262 if c.oldest == nil {
263 c.oldest = u
264 }
265 c.newest = u
266}
267
268// unlink removes the "pathUse" from the LRU doubly-linked list.
269func (c *gzcache) unlink(u *pathUse) {
270 if c.oldest == u {
271 c.oldest = u.next
272 }
273 if c.newest == u {
274 c.newest = u.prev
275 }
276 if u.prev != nil {
277 u.prev.next = u.next
278 }
279 if u.next != nil {
280 u.next.prev = u.prev
281 }
282 u.prev = nil
283 u.next = nil
284}
285
286// Return path to the on-disk gzipped cached file.
287func staticCachePath(dir, path string, mtime int64) string {
288 p := base64.RawURLEncoding.EncodeToString([]byte(path))
289 return filepath.Join(dir, fmt.Sprintf("%s+%x.gz", p, mtime))
290}
291
292// staticgzcacheReplacer intercepts responses for cacheable static files,
293// responding with the cached content if appropriate and failing further writes so
294// the regular response writer stops.
295type staticgzcacheReplacer struct {
296 w http.ResponseWriter
297 r *http.Request // For its context, or logging.
298 uncomprPath string
299 uncomprFile *os.File
300 uncomprMtime time.Time
301 uncomprSize int64
302
303 statusCode int
304
305 // Set during WriteHeader to indicate a compressed file has been written, further
306 // Writes result in an error to stop the writer of the uncompressed content.
307 handled bool
308}
309
310func (w *staticgzcacheReplacer) logger() mlog.Log {
311 return pkglog.WithContext(w.r.Context())
312}
313
314// Header returns the header of the underlying ResponseWriter.
315func (w *staticgzcacheReplacer) Header() http.Header {
316 return w.w.Header()
317}
318
319// WriteHeader checks whether the response is eligible for compressing. If not,
320// WriteHeader on the underlying ResponseWriter is called. If so, headers for gzip
321// content are set and the gzip content is written, either from disk or compressed
322// and stored in the cache.
323func (w *staticgzcacheReplacer) WriteHeader(statusCode int) {
324 if w.statusCode != 0 {
325 return
326 }
327 w.statusCode = statusCode
328 if statusCode != http.StatusOK {
329 w.w.WriteHeader(statusCode)
330 return
331 }
332
333 gzf, gzsize := staticgzcache.openPath(w.uncomprPath, w.uncomprMtime.UnixNano())
334 if gzf == nil {
335 // Not in cache, or work in progress.
336 started := staticgzcache.startPath(w.uncomprPath, w.uncomprMtime.UnixNano())
337 if !started {
338 // Another request is already compressing and storing this file.
339 // todo: we should just wait for the other compression to finish, then use its result.
340 w.w.(*loggingWriter).UncompressedSize = w.uncomprSize
341 h := w.w.Header()
342 h.Set("Content-Encoding", "gzip")
343 h.Del("Content-Length") // We don't know this, we compress streamingly.
344 gzw, _ := gzip.NewWriterLevel(w.w, gzip.BestSpeed)
345 _, err := io.Copy(gzw, w.uncomprFile)
346 if err == nil {
347 err = gzw.Close()
348 }
349 w.handled = true
350 if err != nil {
351 w.w.(*loggingWriter).error(err)
352 }
353 return
354 }
355
356 // Compress and write to cache.
357 p := staticCachePath(staticgzcache.dir, w.uncomprPath, w.uncomprMtime.UnixNano())
358 ngzf, err := os.OpenFile(p, os.O_CREATE|os.O_EXCL|os.O_RDWR, 0600)
359 if err != nil {
360 w.logger().Errorx("create new static gzip cache file", err, slog.String("requestpath", w.uncomprPath), slog.String("fspath", p))
361 staticgzcache.abortPath(w.uncomprPath)
362 return
363 }
364 defer func() {
365 if ngzf != nil {
366 staticgzcache.abortPath(w.uncomprPath)
367 err := ngzf.Close()
368 w.logger().Check(err, "closing failed static gzip cache file", slog.String("requestpath", w.uncomprPath), slog.String("fspath", p))
369 err = os.Remove(p)
370 w.logger().Check(err, "removing failed static gzip cache file", slog.String("requestpath", w.uncomprPath), slog.String("fspath", p))
371 }
372 }()
373
374 gzw := gzip.NewWriter(ngzf)
375 _, err = io.Copy(gzw, w.uncomprFile)
376 if err == nil {
377 err = gzw.Close()
378 }
379 if err == nil {
380 err = ngzf.Sync()
381 }
382 if err == nil {
383 gzsize, err = ngzf.Seek(0, 1)
384 }
385 if err == nil {
386 _, err = ngzf.Seek(0, 0)
387 }
388 if err != nil {
389 w.w.(*loggingWriter).error(err)
390 return
391 }
392 staticgzcache.finishPath(w.uncomprPath, gzsize)
393 gzf = ngzf
394 ngzf = nil
395 }
396 defer func() {
397 if gzf != nil {
398 err := gzf.Close()
399 if err != nil {
400 w.logger().Errorx("closing static gzip cache file", err)
401 }
402 }
403 }()
404
405 // Signal to Write that we aleady (attempted to) write the responses.
406 w.handled = true
407
408 w.w.(*loggingWriter).UncompressedSize = w.uncomprSize
409 h := w.w.Header()
410 h.Set("Content-Encoding", "gzip")
411 h.Set("Content-Length", fmt.Sprintf("%d", gzsize))
412 w.w.WriteHeader(statusCode)
413 if _, err := io.Copy(w.w, gzf); err != nil {
414 w.w.(*loggingWriter).error(err)
415 }
416}
417
418var errHandledCompressed = errors.New("response written with compression")
419
420func (w *staticgzcacheReplacer) Write(buf []byte) (int, error) {
421 if w.statusCode == 0 {
422 w.WriteHeader(http.StatusOK)
423 }
424 if w.handled {
425 // For 200 OK, we already wrote the response and just want the caller to stop processing.
426 return 0, errHandledCompressed
427 }
428 return w.w.Write(buf)
429}
430