// 从MongoDB的trace表同步数据到ClickHouse的events表 // // 支持以下同步模式: // 1. 增量同步:基于上次同步状态,只同步新数据(默认模式) // 2. 自定义时间范围同步:通过指定开始时间和结束时间,同步特定时间范围内的数据 // - 可以通过时间戳参数(start_time/end_time)指定范围 // - 也可以通过日期字符串参数(start_date/end_date)指定范围,支持ISO格式或yyyy-MM-dd格式 // // 使用自定义时间范围时,将不会更新同步状态,避免干扰增量同步进度 import { getVariable, setVariable } from "npm:windmill-client@1"; import { MongoClient, ObjectId } from "https://deno.land/x/mongo@v0.32.0/mod.ts"; interface MongoConfig { host: string; port: string; db: string; username: string; password: string; } interface ClickHouseConfig { clickhouse_host: string; clickhouse_port: number; clickhouse_user: string; clickhouse_password: string; clickhouse_database: string; clickhouse_url: string; } interface TraceRecord { _id: ObjectId; slugId: ObjectId; label: string | null; ip: string; type: number; platform: string; platformOS: string; browser: string; browserVersion: string; url: string; createTime: number; } // 添加 ShortRecord 接口定义 interface ShortRecord { _id: ObjectId; slug: string; // 短链接的slug部分 origin: string; // 原始URL domain?: string; // 域名 createTime: number; // 创建时间戳 user?: string; // 创建用户 title?: string; // 标题 description?: string; // 描述 tags?: string[]; // 标签 active?: boolean; // 是否活跃 expiresAt?: number; // 过期时间戳 teamId?: string; // 团队ID projectId?: string; // 项目ID } interface SyncState { last_sync_time: number; records_synced: number; last_sync_id?: string; } // 定义UTM参数接口 interface UtmParams { utm_source: string; utm_medium: string; utm_campaign: string; utm_term: string; utm_content: string; } // 同步状态键名 const SYNC_STATE_KEY = "f/shorturl_analytics/mongo_sync_state"; // 日期字符串转时间戳工具函数(接受ISO字符串或yyyy-MM-dd格式) function dateToTimestamp(dateStr: string): number { try { // 尝试直接解析完整的ISO日期字符串 const date = new Date(dateStr); // 检查是否为有效日期 if (isNaN(date.getTime())) { // 尝试解析yyyy-MM-dd格式,默认设置为当天的00:00:00 const parts = dateStr.split('-'); if (parts.length === 3) { const year = parseInt(parts[0], 10); const month = parseInt(parts[1], 10) - 1; // 月份从0开始 const day = parseInt(parts[2], 10); const dateObj = new Date(year, month, day, 0, 0, 0); return dateObj.getTime(); } throw new Error(`无法解析日期字符串: ${dateStr}`); } return date.getTime(); } catch (err) { throw new Error(`日期转换错误: ${err instanceof Error ? err.message : String(err)}`); } } // 从URL中提取UTM参数的函数,增强版 function extractUtmParams(url: string, debug = false): UtmParams { const defaultUtmParams: UtmParams = { utm_source: "", utm_medium: "", utm_campaign: "", utm_term: "", utm_content: "" }; if (!url) return defaultUtmParams; if (debug) { console.log(`[UTM提取] 原始URL: ${url}`); } // 准备一个解析后的参数对象 const params: UtmParams = { ...defaultUtmParams }; // 尝试多种方法提取UTM参数 // 方法1: 使用URL对象解析 try { // 先处理URL,确保是完整的URL格式 let normalizedUrl = url; if (!url.match(/^https?:\/\//i)) { normalizedUrl = `https://example.com${url.startsWith('/') ? '' : '/'}${url}`; } const urlObj = new URL(normalizedUrl); // 读取URL参数 if (urlObj.searchParams.has('utm_source')) params.utm_source = urlObj.searchParams.get('utm_source') || ""; if (urlObj.searchParams.has('utm_medium')) params.utm_medium = urlObj.searchParams.get('utm_medium') || ""; if (urlObj.searchParams.has('utm_campaign')) params.utm_campaign = urlObj.searchParams.get('utm_campaign') || ""; if (urlObj.searchParams.has('utm_term')) params.utm_term = urlObj.searchParams.get('utm_term') || ""; if (urlObj.searchParams.has('utm_content')) params.utm_content = urlObj.searchParams.get('utm_content') || ""; if (debug) { console.log(`[UTM提取] URL对象解析结果: ${JSON.stringify(params)}`); } // 如果至少找到一个UTM参数,则返回 if (params.utm_source || params.utm_medium || params.utm_campaign || params.utm_term || params.utm_content) { return params; } } catch (_err) { if (debug) { console.log(`[UTM提取] URL对象解析失败,尝试正则表达式`); } } // 方法2: 使用正则表达式提取参数 // 使用正则表达式(最安全的方法,适用于任何格式) const sourceMatch = url.match(/[?&]utm_source=([^&#]+)/i); if (sourceMatch && sourceMatch[1]) { try { params.utm_source = decodeURIComponent(sourceMatch[1]); } catch (_) { params.utm_source = sourceMatch[1]; } } const mediumMatch = url.match(/[?&]utm_medium=([^&#]+)/i); if (mediumMatch && mediumMatch[1]) { try { params.utm_medium = decodeURIComponent(mediumMatch[1]); } catch (_) { params.utm_medium = mediumMatch[1]; } } const campaignMatch = url.match(/[?&]utm_campaign=([^&#]+)/i); if (campaignMatch && campaignMatch[1]) { try { params.utm_campaign = decodeURIComponent(campaignMatch[1]); } catch (_) { params.utm_campaign = campaignMatch[1]; } } const termMatch = url.match(/[?&]utm_term=([^&#]+)/i); if (termMatch && termMatch[1]) { try { params.utm_term = decodeURIComponent(termMatch[1]); } catch (_) { params.utm_term = termMatch[1]; } } const contentMatch = url.match(/[?&]utm_content=([^&#]+)/i); if (contentMatch && contentMatch[1]) { try { params.utm_content = decodeURIComponent(contentMatch[1]); } catch (_) { params.utm_content = contentMatch[1]; } } if (debug) { console.log(`[UTM提取] 正则表达式解析结果: ${JSON.stringify(params)}`); } return params; } export async function main( batch_size = 1000, max_records = 9999999, timeout_minutes = 60, skip_clickhouse_check = false, force_insert = true, database_override = "shorturl_analytics", // 添加数据库名称参数,默认为shorturl_analytics reset_sync_state = false, // 添加参数用于重置同步状态 debug_utm = false, // 添加参数控制UTM调试日志输出 start_time?: number, // 添加参数指定同步的开始时间戳,可选 end_time?: number, // 添加参数指定同步的结束时间戳,可选 use_custom_time_range = false, // 添加参数控制是否使用自定义时间范围 start_date?: string, // 添加开始日期字符串参数(ISO格式或yyyy-MM-dd格式) end_date?: string // 添加结束日期字符串参数(ISO格式或yyyy-MM-dd格式) ) { const logWithTimestamp = (message: string) => { const now = new Date(); console.log(`[${now.toISOString()}] ${message}`); }; logWithTimestamp("开始执行MongoDB到ClickHouse的同步任务"); logWithTimestamp(`批处理大小: ${batch_size}, 最大记录数: ${max_records}, 超时时间: ${timeout_minutes}分钟`); // 处理日期字符串参数,转换为时间戳 if (start_date) { try { start_time = dateToTimestamp(start_date); logWithTimestamp(`将开始日期 ${start_date} 转换为时间戳 ${start_time}`); use_custom_time_range = true; } catch (err) { logWithTimestamp(`开始日期转换错误: ${err instanceof Error ? err.message : String(err)}`); } } if (end_date) { try { end_time = dateToTimestamp(end_date); // 如果是日期格式,设置为当天结束时间 (23:59:59.999) if (end_date.split('-').length === 3 && end_date.length <= 10) { end_time += 24 * 60 * 60 * 1000 - 1; // 加上23:59:59.999 logWithTimestamp(`将结束日期 ${end_date} 转换为当天结束时间戳 ${end_time}`); } else { logWithTimestamp(`将结束日期 ${end_date} 转换为时间戳 ${end_time}`); } use_custom_time_range = true; } catch (err) { logWithTimestamp(`结束日期转换错误: ${err instanceof Error ? err.message : String(err)}`); } } if (skip_clickhouse_check) { logWithTimestamp("⚠️ 警告: 已启用跳过ClickHouse检查模式,不会检查记录是否已存在"); } if (force_insert) { logWithTimestamp("⚠️ 警告: 已启用强制插入模式,将尝试插入所有记录"); } if (reset_sync_state) { logWithTimestamp("⚠️ 警告: 已启用重置同步状态,将从头开始同步数据"); } if (debug_utm) { logWithTimestamp("已启用UTM参数调试日志"); } if (use_custom_time_range) { if (start_time) { logWithTimestamp(`已启用自定义时间范围:开始时间 ${new Date(start_time).toISOString()}`); } if (end_time) { logWithTimestamp(`已启用自定义时间范围:结束时间 ${new Date(end_time).toISOString()}`); } } // 设置超时 const startTime = Date.now(); const timeoutMs = timeout_minutes * 60 * 1000; // 检查是否超时 const checkTimeout = () => { if (Date.now() - startTime > timeoutMs) { console.log(`运行时间超过${timeout_minutes}分钟,暂停执行`); return true; } return false; }; // 获取MongoDB和ClickHouse的连接信息 let mongoConfig: MongoConfig; let clickhouseConfig: ClickHouseConfig; try { const rawMongoConfig = await getVariable("f/shorturl_analytics/mongodb"); console.log("原始MongoDB配置:", JSON.stringify(rawMongoConfig)); // 尝试解析配置,如果是字符串形式 if (typeof rawMongoConfig === "string") { try { mongoConfig = JSON.parse(rawMongoConfig); } catch (e) { console.error("MongoDB配置解析失败:", e); throw e; } } else { mongoConfig = rawMongoConfig as MongoConfig; } const rawClickhouseConfig = await getVariable("f/shorturl_analytics/clickhouse"); console.log("原始ClickHouse配置:", JSON.stringify(rawClickhouseConfig)); // 尝试解析配置,如果是字符串形式 if (typeof rawClickhouseConfig === "string") { try { clickhouseConfig = JSON.parse(rawClickhouseConfig); } catch (e) { console.error("ClickHouse配置解析失败:", e); throw e; } } else { clickhouseConfig = rawClickhouseConfig as ClickHouseConfig; } // 检查并修复数据库配置 if (!clickhouseConfig.clickhouse_database || clickhouseConfig.clickhouse_database === "undefined") { logWithTimestamp(`⚠️ 警告: 数据库名称未定义或为'undefined',使用提供的默认值: ${database_override}`); clickhouseConfig.clickhouse_database = database_override; } console.log("MongoDB配置解析为:", JSON.stringify(mongoConfig)); console.log("ClickHouse配置解析为:", JSON.stringify({ ...clickhouseConfig, clickhouse_password: "****" // 隐藏密码 })); logWithTimestamp(`将使用ClickHouse数据库: ${clickhouseConfig.clickhouse_database}`); } catch (error) { console.error("获取配置失败:", error); throw error; } // 获取上次同步状态 let lastSyncState: SyncState | null = null; if (!reset_sync_state) { try { const rawSyncState = await getVariable(SYNC_STATE_KEY); if (rawSyncState) { if (typeof rawSyncState === "string") { try { lastSyncState = JSON.parse(rawSyncState); } catch (e) { logWithTimestamp(`解析上次同步状态失败: ${e}, 将从头开始同步`); } } else { lastSyncState = rawSyncState as SyncState; } } } catch (error) { logWithTimestamp(`获取上次同步状态失败: ${error}, 将从头开始同步`); } } if (lastSyncState) { logWithTimestamp(`找到上次同步状态: 最后同步时间 ${new Date(lastSyncState.last_sync_time).toISOString()}, 已同步记录数 ${lastSyncState.records_synced}`); if (lastSyncState.last_sync_id) { logWithTimestamp(`最后同步ID: ${lastSyncState.last_sync_id}`); } } else { logWithTimestamp("没有找到上次同步状态,将从头开始同步"); } // 构建MongoDB连接URL let mongoUrl = "mongodb://"; if (mongoConfig.username && mongoConfig.password) { mongoUrl += `${mongoConfig.username}:${mongoConfig.password}@`; } mongoUrl += `${mongoConfig.host}:${mongoConfig.port}/${mongoConfig.db}`; console.log(`MongoDB连接URL: ${mongoUrl.replace(/:[^:]*@/, ":****@")}`); // 连接MongoDB const client = new MongoClient(); try { await client.connect(mongoUrl); console.log("MongoDB连接成功"); const db = client.database(mongoConfig.db); const traceCollection = db.collection("trace"); // 添加对short集合的引用 const shortCollection = db.collection("short"); // 构建查询条件,根据上次同步状态获取新记录 const query: Record = { type: 1 // 只同步type为1的记录 }; // 根据时间范围参数构建查询条件 if (use_custom_time_range) { // 使用自定义时间范围 const timeQuery: Record = {}; if (start_time) { timeQuery.$gte = start_time; logWithTimestamp(`将只同步createTime >= ${start_time} (${new Date(start_time).toISOString()}) 的记录`); } if (end_time) { timeQuery.$lte = end_time; logWithTimestamp(`将只同步createTime <= ${end_time} (${new Date(end_time).toISOString()}) 的记录`); } // 只有当至少指定了一个时间限制时才添加时间查询条件 if (Object.keys(timeQuery).length > 0) { query.createTime = timeQuery; } } // 如果不使用自定义时间范围,且有上次同步状态,则只获取更新的记录 else if (lastSyncState && lastSyncState.last_sync_time) { // 使用上次同步时间作为过滤条件 query.createTime = { $gt: lastSyncState.last_sync_time }; logWithTimestamp(`将只同步createTime > ${lastSyncState.last_sync_time} (${new Date(lastSyncState.last_sync_time).toISOString()}) 的记录`); } // 计算总记录数 const totalRecords = await traceCollection.countDocuments(query); console.log(`找到 ${totalRecords} 条新记录需要同步`); // 限制此次处理的记录数量 const recordsToProcess = Math.min(totalRecords, max_records); console.log(`本次将处理 ${recordsToProcess} 条记录`); if (totalRecords === 0) { console.log("没有新记录需要同步,任务完成"); return { success: true, records_synced: 0, message: "没有新记录需要同步" }; } // 检查ClickHouse连接状态 const checkClickHouseConnection = async (): Promise => { if (skip_clickhouse_check) { logWithTimestamp("已启用跳过ClickHouse检查,不测试连接"); return true; } try { logWithTimestamp("测试ClickHouse连接..."); const clickhouseUrl = `${clickhouseConfig.clickhouse_url}`; const response = await fetch(clickhouseUrl, { method: "POST", headers: { "Content-Type": "application/x-www-form-urlencoded", "Authorization": `Basic ${btoa(`${clickhouseConfig.clickhouse_user}:${clickhouseConfig.clickhouse_password}`)}`, }, body: `SELECT 1 FROM ${clickhouseConfig.clickhouse_database}.events LIMIT 1`, // 设置5秒超时 signal: AbortSignal.timeout(5000) }); if (response.ok) { logWithTimestamp("ClickHouse连接测试成功"); return true; } else { const errorText = await response.text(); logWithTimestamp(`ClickHouse连接测试失败: ${response.status} ${errorText}`); return false; } } catch (err) { const error = err as Error; logWithTimestamp(`ClickHouse连接测试失败: ${error.message}`); return false; } }; // 在处理记录前先检查ClickHouse连接 const clickhouseConnected = await checkClickHouseConnection(); if (!clickhouseConnected && !skip_clickhouse_check) { logWithTimestamp("⚠️ ClickHouse连接测试失败,请启用skip_clickhouse_check=true参数来跳过连接检查"); throw new Error("ClickHouse连接失败,无法继续同步"); } // 处理记录的函数 const processRecords = async (records: TraceRecord[]) => { if (records.length === 0) return 0; logWithTimestamp(`开始处理批次数据,共 ${records.length} 条记录...`); // 强制使用所有记录,不检查重复 const newRecords = records; logWithTimestamp(`准备处理 ${newRecords.length} 条记录...`); // 获取链接信息 - 新增代码 const slugIds = newRecords.map(record => record.slugId); logWithTimestamp(`正在查询 ${slugIds.length} 条短链接信息...`); const shortLinks = await shortCollection.find({ _id: { $in: slugIds } }).toArray(); // 创建映射用于快速查找 - 新增代码 const shortLinksMap = new Map(shortLinks.map((link: ShortRecord) => [link._id.toString(), link])); logWithTimestamp(`获取到 ${shortLinks.length} 条短链接信息,${newRecords.length - shortLinks.length} 条数据将使用占位符`); // 准备ClickHouse插入数据 const clickhouseData = newRecords.map(record => { const eventTime = new Date(record.createTime); // 获取对应的短链接信息 - 新增代码 const shortLink = shortLinksMap.get(record.slugId.toString()) as ShortRecord | undefined; // 提取URL中的UTM参数 - 增加调试日志 if (debug_utm && record.url) { logWithTimestamp(`======== UTM参数调试 ========`); logWithTimestamp(`记录ID: ${record._id.toString()}`); logWithTimestamp(`原始URL: ${record.url}`); } const utmParams = extractUtmParams(record.url || "", debug_utm); if (debug_utm) { logWithTimestamp(`提取的UTM参数: ${JSON.stringify(utmParams)}`); logWithTimestamp(`===========================`); } // 保存提取的UTM参数和URL到event_attributes const eventAttributes = { mongo_id: record._id.toString(), url: record.url || "", ...(record.url ? { raw_url: record.url } : {}) }; // 转换MongoDB记录为ClickHouse格式,匹配ClickHouse表结构 return { // UUID将由ClickHouse自动生成 (event_id) event_time: eventTime.toISOString().replace('T', ' ').replace('Z', ''), event_type: record.type === 1 ? "visit" : "custom", event_attributes: JSON.stringify(eventAttributes), link_id: record.slugId.toString(), link_slug: shortLink?.slug || "unknown_slug", // 使用占位符 link_label: record.label || "", link_title: shortLink?.title || "unknown_title", // 使用占位符 link_original_url: shortLink?.origin || "https://unknown.url", // 使用占位符 link_attributes: JSON.stringify({ domain: shortLink?.domain || "unknown_domain" }), // 使用占位符 link_created_at: shortLink?.createTime ? new Date(shortLink.createTime).toISOString().replace('T', ' ').replace('Z', '') : eventTime.toISOString().replace('T', ' ').replace('Z', ''), link_expires_at: shortLink?.expiresAt ? new Date(shortLink.expiresAt).toISOString().replace('T', ' ').replace('Z', '') : null, link_tags: shortLink?.tags ? JSON.stringify(shortLink.tags) : "[]", user_id: shortLink?.user || "unknown_user", // 使用占位符 user_name: "unknown_user", // 使用占位符 user_email: "", user_attributes: "{}", team_id: shortLink?.teamId || "unknown_team", // 使用占位符 team_name: "unknown_team", // 使用占位符 team_attributes: "{}", project_id: shortLink?.projectId || "unknown_project", // 使用占位符 project_name: "unknown_project", // 使用占位符 project_attributes: "{}", qr_code_id: "", qr_code_name: "", qr_code_attributes: "{}", visitor_id: record._id.toString(), session_id: record._id.toString() + "-" + record.createTime, ip_address: record.ip || "0.0.0.0", // 使用占位符 country: "", city: "", device_type: record.platform || "unknown", browser: record.browser || "unknown", // 使用占位符 os: record.platformOS || "unknown", // 使用占位符 user_agent: (record.browser || "unknown") + " " + (record.browserVersion || "unknown"), // 使用占位符 referrer: record.url || "", utm_source: utmParams.utm_source || "", utm_medium: utmParams.utm_medium || "", utm_campaign: utmParams.utm_campaign || "", utm_term: utmParams.utm_term || "", utm_content: utmParams.utm_content || "", time_spent_sec: 0, is_bounce: true, is_qr_scan: false, conversion_type: "visit", conversion_value: 0, req_full_path: record.url || "" }; }); // 生成ClickHouse插入SQL const insertSQL = ` INSERT INTO ${clickhouseConfig.clickhouse_database}.events (event_time, event_type, event_attributes, link_id, link_slug, link_label, link_title, link_original_url, link_attributes, link_created_at, link_expires_at, link_tags, user_id, user_name, user_email, user_attributes, team_id, team_name, team_attributes, project_id, project_name, project_attributes, qr_code_id, qr_code_name, qr_code_attributes, visitor_id, session_id, ip_address, country, city, device_type, browser, os, user_agent, referrer, utm_source, utm_medium, utm_campaign, utm_term, utm_content, time_spent_sec, is_bounce, is_qr_scan, conversion_type, conversion_value, req_full_path) VALUES ${clickhouseData.map(record => { // 确保所有字符串值都是字符串类型,并安全处理替换 const safeReplace = (val: unknown): string => { // 确保值是字符串,如果是null或undefined则使用空字符串 const str = val === null || val === undefined ? "" : String(val); // 安全替换单引号 return str.replace(/'/g, "''"); }; return `('${record.event_time}', '${safeReplace(record.event_type)}', '${safeReplace(record.event_attributes)}', '${record.link_id}', '${safeReplace(record.link_slug)}', '${safeReplace(record.link_label)}', '${safeReplace(record.link_title)}', '${safeReplace(record.link_original_url)}', '${safeReplace(record.link_attributes)}', '${record.link_created_at}', ${record.link_expires_at === null ? 'NULL' : `'${record.link_expires_at}'`}, '${safeReplace(record.link_tags)}', '${safeReplace(record.user_id)}', '${safeReplace(record.user_name)}', '${safeReplace(record.user_email)}', '${safeReplace(record.user_attributes)}', '${safeReplace(record.team_id)}', '${safeReplace(record.team_name)}', '${safeReplace(record.team_attributes)}', '${safeReplace(record.project_id)}', '${safeReplace(record.project_name)}', '${safeReplace(record.project_attributes)}', '${safeReplace(record.qr_code_id)}', '${safeReplace(record.qr_code_name)}', '${safeReplace(record.qr_code_attributes)}', '${safeReplace(record.visitor_id)}', '${safeReplace(record.session_id)}', '${safeReplace(record.ip_address)}', '${safeReplace(record.country)}', '${safeReplace(record.city)}', '${safeReplace(record.device_type)}', '${safeReplace(record.browser)}', '${safeReplace(record.os)}', '${safeReplace(record.user_agent)}', '${safeReplace(record.referrer)}', '${safeReplace(record.utm_source)}', '${safeReplace(record.utm_medium)}', '${safeReplace(record.utm_campaign)}', '${safeReplace(record.utm_term)}', '${safeReplace(record.utm_content)}', ${record.time_spent_sec}, ${record.is_bounce}, ${record.is_qr_scan}, '${safeReplace(record.conversion_type)}', ${record.conversion_value}, '${safeReplace(record.req_full_path)}')`; }).join(", ")} `; if (insertSQL.length === 0) { console.log("没有新记录需要插入"); return 0; } // 发送请求到ClickHouse,添加20秒超时 const clickhouseUrl = `${clickhouseConfig.clickhouse_url}`; try { logWithTimestamp("发送插入请求到ClickHouse..."); const response = await fetch(clickhouseUrl, { method: "POST", headers: { "Content-Type": "application/x-www-form-urlencoded", "Authorization": `Basic ${btoa(`${clickhouseConfig.clickhouse_user}:${clickhouseConfig.clickhouse_password}`)}` }, body: insertSQL, signal: AbortSignal.timeout(20000) }); if (!response.ok) { const errorText = await response.text(); throw new Error(`ClickHouse插入错误: ${response.status} ${errorText}`); } logWithTimestamp(`成功插入 ${newRecords.length} 条记录到ClickHouse`); return newRecords.length; } catch (err) { const error = err as Error; logWithTimestamp(`向ClickHouse插入数据失败: ${error.message}`); throw error; } }; // 批量处理记录 let processedRecords = 0; let totalBatchRecords = 0; let lastSyncTime = 0; for (let page = 0; processedRecords < recordsToProcess; page++) { // 检查超时 if (checkTimeout()) { logWithTimestamp(`已处理 ${processedRecords}/${recordsToProcess} 条记录,因超时暂停执行`); break; } // 每批次都输出进度 logWithTimestamp(`开始处理第 ${page+1} 批次,已完成 ${processedRecords}/${recordsToProcess} 条记录 (${Math.round(processedRecords/recordsToProcess*100)}%)`); logWithTimestamp(`正在从MongoDB获取第 ${page+1} 批次数据...`); const records = await traceCollection.find( query, { allowDiskUse: true, sort: { createTime: 1 }, skip: page * batch_size, limit: batch_size } ).toArray(); if (records.length === 0) { logWithTimestamp("没有找到更多数据,同步结束"); break; } // 找到数据,开始处理 logWithTimestamp(`获取到 ${records.length} 条记录,开始处理...`); // 输出当前批次的部分数据信息 if (records.length > 0) { logWithTimestamp(`批次 ${page+1} 第一条记录: ID=${records[0]._id}, 时间=${new Date(records[0].createTime).toISOString()}`); if (records.length > 1) { logWithTimestamp(`批次 ${page+1} 最后一条记录: ID=${records[records.length-1]._id}, 时间=${new Date(records[records.length-1].createTime).toISOString()}`); } // 如果开启了调试,输出一些URL样本 if (debug_utm) { const sampleSize = Math.min(5, records.length); logWithTimestamp(`URL样本 (前${sampleSize}条):`); for (let i = 0; i < sampleSize; i++) { if (records[i].url) { logWithTimestamp(`样本 ${i+1}: ${records[i].url}`); } } } } const batchSize = await processRecords(records); processedRecords += records.length; totalBatchRecords += batchSize; // 更新最后处理的记录时间和ID if (records.length > 0) { const lastRecord = records[records.length - 1]; lastSyncTime = Math.max(lastSyncTime, lastRecord.createTime); } logWithTimestamp(`第 ${page+1} 批次处理完成。已处理 ${processedRecords}/${recordsToProcess} 条记录,实际插入 ${totalBatchRecords} 条 (${Math.round(processedRecords/recordsToProcess*100)}%)`); } // 更新同步状态 if (processedRecords > 0 && lastSyncTime > 0) { // 只在非自定义时间范围模式下更新同步状态 if (!use_custom_time_range) { // 创建新的同步状态,简化对象结构 const newSyncState: SyncState = { last_sync_time: lastSyncTime, records_synced: (lastSyncState ? lastSyncState.records_synced : 0) + processedRecords, // 使用处理的总记录数,而不是实际插入数 }; try { // 保存同步状态 await setVariable(SYNC_STATE_KEY, newSyncState); logWithTimestamp(`同步状态已更新: 最后同步时间 ${new Date(newSyncState.last_sync_time).toISOString()}, 累计同步记录数 ${newSyncState.records_synced}`); } catch (err) { const error = err as Error; logWithTimestamp(`更新同步状态失败: ${error.message},将继续执行`); // 不抛出错误,继续执行 } } else { logWithTimestamp("使用自定义时间范围模式,不更新全局同步状态"); } } return { success: true, records_processed: processedRecords, records_synced: totalBatchRecords, last_sync_time: lastSyncTime > 0 ? new Date(lastSyncTime).toISOString() : null, message: use_custom_time_range ? "自定义时间范围数据同步完成" : "数据同步完成", custom_time_range_used: use_custom_time_range }; } catch (err) { console.error("同步过程中发生错误:", err); return { success: false, error: err instanceof Error ? err.message : String(err), stack: err instanceof Error ? err.stack : undefined }; } finally { // 关闭MongoDB连接 await client.close(); console.log("MongoDB连接已关闭"); } }