Cloudflare一直是我非常喜欢的厂商,免费的套餐就可以提供不错的服务,只不过现在被玩坏了,国内访问速度大不如前,但是总体来说有还是比没有强。
不过免费版套餐的Cloudflare CDN有个非常致命的缺点,就是不能根据cookie来区分访客,从而针对性提供访问内容(比如不为登陆、评论过的用户提供缓存)。并且还会把已登录或已评论用户的信息缓存上去,这点难以接受,也是我抛弃Cloudflare的重要原因之一。
不过在Cloudflare Workers面世后,这个问题就迎刃而解了,我们可以使用Workers来绕过这个限制,自己编写缓存策略。
介绍
关于Cloudflare Workers的介绍可以自行搜索,通过搜索引擎看到本文人或许应该已经有所了解。
我简单阐述一下,本文要讲的内容,就是通过Cloudflare官方曾编写的一款脚本,实现自定义缓存策略。
Edge Cache HTML
Cloudflare官方曾编写的一款WordPress插件,用以搭配edge-cache-html,不过这个项目已经快两年没更新了,应该是觉得这个项目会影响到他们付费套餐的出售。
https://github.com/cloudflare/worker-examples/tree/master/examples/edge-cache-html
目前这个脚本直接使用,是无法在WordPress下生效的,并且缺失URL路径规则。我稍作修改,增加了路径排除和支持在WordPress下是缓存(去除header cache判断)。
前期操作请参考 [WordPress]利用 Cloudflare Workers 来缓存博客的 HTML 网页 ,写的非常详细,只不过这位博主没有没有缓存成功。
以下是修改后的脚本
// IMPORTANT: Either A Key/Value Namespace must be bound to this worker script // using the variable name EDGE_CACHE. or the API parameters below should be // configured. KV is recommended if possible since it can purge just the HTML // instead of the full cache.
// Default cookie prefixes for bypass const DEFAULT_BYPASS_COOKIES = [ "wp-", "wordpress", "comment_", "woocommerce_" ];
// URL paths to bypass the cache (each pattern is a regex) const BYPASS_URL_PATTERNS = [ /\/wp-admin\/.*/, /\/wp-adminlogin\/.*/ ];
/** * Main worker entry point. */ addEventListener("fetch", event => { const request = event.request; let upstreamCache = request.headers.get('x-HTML-Edge-Cache');
// Only process requests if KV store is set up and there is no // HTML edge cache in front of this worker (only the outermost cache // should handle HTML caching in case there are varying levels of support). let configured = false; if (typeof EDGE_CACHE !== 'undefined') { configured = true; } else if (CLOUDFLARE_API.email.length && CLOUDFLARE_API.key.length && CLOUDFLARE_API.zone.length) { configured = true; }
// Bypass processing of image requests (for everything except Firefox which doesn't use image/*) const accept = request.headers.get('Accept'); let isImage = false; if (accept && (accept.indexOf('image/*') !== -1)) { isImage = true; }
if (configured && !isImage && upstreamCache === null) { event.passThroughOnException(); event.respondWith(processRequest(request, event)); } });
/** * Process every request coming through to add the edge-cache header, * watch for purge responses and possibly cache HTML GET requests. * * @param {Request} originalRequest - Original request * @param {Event} event - Original event (for additional async waiting) */ async function processRequest(originalRequest, event) { let cfCacheStatus = null; const accept = originalRequest.headers.get('Accept'); const isHTML = (accept && accept.indexOf('text/html') >= 0); let {response, cacheVer, status, bypassCache} = await getCachedResponse(originalRequest);
if (response === null) { // Clone the request, add the edge-cache header and send it through. let request = new Request(originalRequest); request.headers.set('x-HTML-Edge-Cache', 'supports=cache|purgeall|bypass-cookies'); response = await fetch(request);
if (response) { const options = getResponseOptions(response); if (options && options.purge) { await purgeCache(cacheVer, event); status += ', Purged'; } bypassCache = bypassCache || shouldBypassEdgeCache(request, response); if ((!options || options.cache) && isHTML && originalRequest.method === 'GET' && response.status === 200 && !bypassCache) { status += await cacheResponse(cacheVer, originalRequest, response, event); } } } else { // If the origin didn't send the control header we will send the cached response but update // the cached copy asynchronously (stale-while-revalidate). This commonly happens with // a server-side disk cache that serves the HTML directly from disk. cfCacheStatus = 'HIT'; if (originalRequest.method === 'GET' && response.status === 200 && isHTML) { bypassCache = bypassCache || shouldBypassEdgeCache(originalRequest, response); if (!bypassCache) { const options = getResponseOptions(response); if (!options) { status += ', Refreshed'; event.waitUntil(updateCache(originalRequest, cacheVer, event)); } } } }
if (response && status !== null && originalRequest.method === 'GET' && response.status === 200 && isHTML) { response = new Response(response.body, response); response.headers.set('x-HTML-Edge-Cache-Status', status); if (cacheVer !== null) { response.headers.set('x-HTML-Edge-Cache-Version', cacheVer.toString()); } if (cfCacheStatus) { response.headers.set('CF-Cache-Status', cfCacheStatus); } }
return response; }
/** * Determine if the cache should be bypassed for the given request/response pair. * Specifically, if the request includes a cookie that the response flags for bypass. * Can be used on cache lookups to determine if the request needs to go to the origin and * origin responses to determine if they should be written to cache. * @param {Request} request - Request * @param {Response} response - Response * @returns {bool} true if the cache should be bypassed */ function shouldBypassEdgeCache(request, response) { let bypassCache = false;
// Bypass the cache for all requests to a URL that matches any of the URL path bypass patterns const url = new URL(request.url); const path = url.pathname + url.search; if (BYPASS_URL_PATTERNS.length) { for (let pattern of BYPASS_URL_PATTERNS) { if (path.match(pattern)) { bypassCache = true; break; } } }
if (request && response) { const options = getResponseOptions(response); const cookieHeader = request.headers.get('cookie'); let bypassCookies = DEFAULT_BYPASS_COOKIES; if (options) { bypassCookies = options.bypassCookies; } if (cookieHeader && cookieHeader.length && bypassCookies.length) { const cookies = cookieHeader.split(';'); for (let cookie of cookies) { // See if the cookie starts with any of the logged-in user prefixes for (let prefix of bypassCookies) { if (cookie.trim().startsWith(prefix)) { bypassCache = true; break; } } if (bypassCache) { break; } } } }
return bypassCache; }
const CACHE_HEADERS = ['Cache-Control', 'Expires', 'Pragma'];
/** * Check for cached HTML GET requests. * * @param {Request} request - Original request */ async function getCachedResponse(request) { let response = null; let cacheVer = null; let bypassCache = false; let status = 'Miss';
// Only check for HTML GET requests (saves on reading from KV unnecessarily) // and not when there are cache-control headers on the request (refresh) const accept = request.headers.get('Accept'); const cacheControl = request.headers.get('Cache-Control'); let noCache = false; // if (cacheControl && cacheControl.indexOf('no-cache') !== -1) { // noCache = true; // status = 'Bypass for Reload'; // } if (!noCache && request.method === 'GET' && accept && accept.indexOf('text/html') >= 0) { // Build the versioned URL for checking the cache cacheVer = await GetCurrentCacheVersion(cacheVer); const cacheKeyRequest = GenerateCacheRequest(request, cacheVer);
// See if there is a request match in the cache try { let cache = caches.default; let cachedResponse = await cache.match(cacheKeyRequest); if (cachedResponse) { // Copy Response object so that we can edit headers. cachedResponse = new Response(cachedResponse.body, cachedResponse);
// Check to see if the response needs to be bypassed because of a cookie bypassCache = shouldBypassEdgeCache(request, cachedResponse);
// Copy the original cache headers back and clean up any control headers if (bypassCache) { status = 'Bypass Cookie'; } else { status = 'Hit'; cachedResponse.headers.delete('Cache-Control'); cachedResponse.headers.delete('x-HTML-Edge-Cache-Status'); for (header of CACHE_HEADERS) { let value = cachedResponse.headers.get('x-HTML-Edge-Cache-Header-' + header); if (value) { cachedResponse.headers.delete('x-HTML-Edge-Cache-Header-' + header); cachedResponse.headers.set(header, value); } } response = cachedResponse; } } else { status = 'Miss'; }