乐闻世界logo
搜索文章和话题

Puppeteer 如何管理 Cookie 和存储?如何实现会话持久化和多账户管理?

2月19日 19:39

Puppeteer 提供了强大的 Cookie 和存储管理功能,可以模拟真实的用户会话、保持登录状态、管理本地存储等。

1. Cookie 管理

获取所有 Cookie:

javascript
const cookies = await page.cookies(); console.log(cookies);

获取特定 URL 的 Cookie:

javascript
const cookies = await page.cookies('https://example.com');

设置 Cookie:

javascript
await page.setCookie({ name: 'session_id', value: 'abc123', domain: '.example.com', path: '/', expires: Math.floor(Date.now() / 1000) + 3600, // 1 小时后过期 httpOnly: true, secure: true, sameSite: 'Lax' });

设置多个 Cookie:

javascript
await page.setCookie( { name: 'cookie1', value: 'value1', domain: '.example.com' }, { name: 'cookie2', value: 'value2', domain: '.example.com' } );

删除 Cookie:

javascript
// 删除指定 Cookie await page.deleteCookie({ name: 'session_id', domain: '.example.com' }); // 删除所有 Cookie const cookies = await page.cookies(); await page.deleteCookie(...cookies);

清除所有 Cookie:

javascript
await page.evaluate(() => { document.cookie.split(";").forEach(c => { document.cookie = c.replace(/^ +/, "").replace(/=.*/, "=;expires=" + new Date().toUTCString() + ";path=/"); }); });

2. LocalStorage 管理

获取 LocalStorage 数据:

javascript
const localStorageData = await page.evaluate(() => { const data = {}; for (let i = 0; i < localStorage.length; i++) { const key = localStorage.key(i); data[key] = localStorage.getItem(key); } return data; });

设置 LocalStorage 数据:

javascript
await page.evaluate(() => { localStorage.setItem('user_id', '12345'); localStorage.setItem('preferences', JSON.stringify({ theme: 'dark' })); });

获取特定 LocalStorage 项:

javascript
const userId = await page.evaluate(() => { return localStorage.getItem('user_id'); });

删除 LocalStorage 项:

javascript
await page.evaluate(() => { localStorage.removeItem('user_id'); });

清除所有 LocalStorage:

javascript
await page.evaluate(() => { localStorage.clear(); });

3. SessionStorage 管理

获取 SessionStorage 数据:

javascript
const sessionStorageData = await page.evaluate(() => { const data = {}; for (let i = 0; i < sessionStorage.length; i++) { const key = sessionStorage.key(i); data[key] = sessionStorage.getItem(key); } return data; });

设置 SessionStorage 数据:

javascript
await page.evaluate(() => { sessionStorage.setItem('temp_data', 'temporary_value'); });

清除所有 SessionStorage:

javascript
await page.evaluate(() => { sessionStorage.clear(); });

4. IndexedDB 管理

获取 IndexedDB 数据:

javascript
const indexedDBData = await page.evaluate(async () => { return new Promise((resolve, reject) => { const request = indexedDB.open('myDatabase', 1); request.onsuccess = (event) => { const db = event.target.result; const transaction = db.transaction(['myStore'], 'readonly'); const store = transaction.objectStore('myStore'); const getAllRequest = store.getAll(); getAllRequest.onsuccess = () => { resolve(getAllRequest.result); }; getAllRequest.onerror = () => { reject(getAllRequest.error); }; }; request.onerror = () => { reject(request.error); }; }); });

5. 浏览器上下文和隔离

使用 Incognito 上下文:

javascript
const context = await browser.createIncognitoBrowserContext(); const page = await context.newPage(); // 在隔离环境中操作 await page.goto('https://example.com'); // 关闭上下文,清除所有数据 await context.close();

多个隔离上下文:

javascript
// 创建多个隔离的上下文 const context1 = await browser.createIncognitoBrowserContext(); const context2 = await browser.createIncognitoBrowserContext(); const page1 = await context1.newPage(); const page2 = await context2.newPage(); // 两个上下文的 Cookie 和存储完全隔离

6. 会话持久化

保存会话状态:

javascript
async function saveSession(page, filePath) { const cookies = await page.cookies(); const localStorage = await page.evaluate(() => { const data = {}; for (let i = 0; i < localStorage.length; i++) { const key = localStorage.key(i); data[key] = localStorage.getItem(key); } return data; }); const session = { cookies, localStorage, url: page.url(), timestamp: Date.now() }; const fs = require('fs'); fs.writeFileSync(filePath, JSON.stringify(session, null, 2)); }

恢复会话状态:

javascript
async function restoreSession(page, filePath) { const fs = require('fs'); const session = JSON.parse(fs.readFileSync(filePath, 'utf8')); // 恢复 Cookie await page.setCookie(...session.cookies); // 恢复 LocalStorage await page.evaluate((data) => { for (const [key, value] of Object.entries(data)) { localStorage.setItem(key, value); } }, session.localStorage); // 导航到之前的 URL await page.goto(session.url); }

7. 实际应用场景

场景 1:保持登录状态

javascript
async function loginAndSaveSession() { const browser = await puppeteer.launch(); const page = await browser.newPage(); // 登录 await page.goto('https://example.com/login'); await page.type('#username', 'user@example.com'); await page.type('#password', 'password'); await page.click('#login-button'); await page.waitForNavigation(); // 保存会话 await saveSession(page, 'session.json'); await browser.close(); } async function useSavedSession() { const browser = await puppeteer.launch(); const page = await browser.newPage(); // 恢复会话 await restoreSession(page, 'session.json'); // 直接访问需要登录的页面 await page.goto('https://example.com/dashboard'); // 验证是否已登录 const isLoggedIn = await page.$('.user-profile') !== null; console.log('Is logged in:', isLoggedIn); await browser.close(); }

场景 2:多账户管理

javascript
async function manageMultipleAccounts(accounts) { const browser = await puppeteer.launch(); for (const account of accounts) { // 为每个账户创建隔离的上下文 const context = await browser.createIncognitoBrowserContext(); const page = await context.newPage(); // 登录账户 await page.goto('https://example.com/login'); await page.type('#username', account.username); await page.type('#password', account.password); await page.click('#login-button'); await page.waitForNavigation(); // 执行账户操作 await page.goto('https://example.com/dashboard'); const data = await page.evaluate(() => { return document.querySelector('.user-data').textContent; }); console.log(`Account ${account.username}: ${data}`); // 关闭上下文,清除数据 await context.close(); } await browser.close(); } manageMultipleAccounts([ { username: 'user1@example.com', password: 'pass1' }, { username: 'user2@example.com', password: 'pass2' } ]);

场景 3:A/B 测试

javascript
async function abTesting(url, variants) { const browser = await puppeteer.launch(); for (const variant of variants) { const context = await browser.createIncognitoBrowserContext(); const page = await context.newPage(); // 设置 A/B 测试 Cookie await page.setCookie({ name: 'ab_test_variant', value: variant.id, domain: new URL(url).hostname }); await page.goto(url); // 收集数据 const data = await page.evaluate(() => { return { title: document.title, content: document.querySelector('.content')?.textContent }; }); console.log(`Variant ${variant.id}:`, data); await context.close(); } await browser.close(); } abTesting('https://example.com', [ { id: 'A' }, { id: 'B' } ]);

场景 4:购物车持久化

javascript
async function saveShoppingCart(page, userId) { const cartData = await page.evaluate(() => { return JSON.parse(localStorage.getItem('cart') || '[]'); }); const fs = require('fs'); const filePath = `carts/${userId}.json`; fs.writeFileSync(filePath, JSON.stringify(cartData, null, 2)); } async function restoreShoppingCart(page, userId) { const fs = require('fs'); const filePath = `carts/${userId}.json`; if (fs.existsSync(filePath)) { const cartData = JSON.parse(fs.readFileSync(filePath, 'utf8')); await page.evaluate((data) => { localStorage.setItem('cart', JSON.stringify(data)); }, cartData); } }

8. 安全注意事项

1. 敏感数据保护:

javascript
// 不要在代码中硬编码敏感信息 // 使用环境变量 const password = process.env.PASSWORD; // 不要将包含敏感信息的会话文件提交到版本控制 // 将 session.json 添加到 .gitignore

2. Cookie 安全:

javascript
// 设置安全的 Cookie 属性 await page.setCookie({ name: 'session', value: 'value', httpOnly: true, // 防止 XSS 攻击 secure: true, // 仅通过 HTTPS 传输 sameSite: 'Strict' // 防止 CSRF 攻击 });

3. 会话过期处理:

javascript
async function checkSessionValidity(page) { const cookies = await page.cookies(); const sessionCookie = cookies.find(c => c.name === 'session_id'); if (!sessionCookie || sessionCookie.expires * 1000 < Date.now()) { // 会话已过期,重新登录 await relogin(page); } }

9. 最佳实践

1. 使用隔离上下文:

javascript
// 为每个用户或会话创建隔离的上下文 const context = await browser.createIncognitoBrowserContext(); const page = await context.newPage(); // 操作完成后关闭上下文 await context.close();

2. 定期清理:

javascript
// 定期清理过期的 Cookie 和存储 async function cleanupStorage(page) { const cookies = await page.cookies(); const validCookies = cookies.filter(c => !c.expires || c.expires * 1000 > Date.now() ); await page.deleteCookie(...cookies); await page.setCookie(...validCookies); }

3. 错误处理:

javascript
try { await page.setCookie(cookie); } catch (error) { console.error('Failed to set cookie:', error); // 处理错误 }

4. 性能优化:

javascript
// 批量操作 Cookie await page.setCookie(...cookies); // 避免频繁的存储操作 const data = await page.evaluate(() => { // 一次性获取所有需要的数据 return { localStorage: { ...localStorage }, sessionStorage: { ...sessionStorage } }; });
标签:Puppeteer