sync & read me
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
// Sync data from MongoDB trace table to ClickHouse events table
|
||||
import { getVariable } from "npm:windmill-client@1";
|
||||
// 从MongoDB的trace表同步数据到ClickHouse的events表
|
||||
import { getVariable, setVariable } from "npm:windmill-client@1";
|
||||
import { MongoClient, ObjectId } from "https://deno.land/x/mongo@v0.32.0/mod.ts";
|
||||
|
||||
interface MongoConfig {
|
||||
@@ -15,6 +15,7 @@ interface ClickHouseConfig {
|
||||
clickhouse_port: number;
|
||||
clickhouse_user: string;
|
||||
clickhouse_password: string;
|
||||
clickhouse_database: string;
|
||||
clickhouse_url: string;
|
||||
}
|
||||
|
||||
@@ -32,25 +33,10 @@ interface TraceRecord {
|
||||
createTime: number;
|
||||
}
|
||||
|
||||
interface ShortRecord {
|
||||
_id: ObjectId;
|
||||
slug: string; // 短链接的slug部分
|
||||
origin: string; // 原始URL
|
||||
domain?: string; // 域名
|
||||
createTime: number; // 创建时间戳
|
||||
user?: string; // 创建用户
|
||||
title?: string; // 标题
|
||||
description?: string; // 描述
|
||||
tags?: string[]; // 标签
|
||||
active?: boolean; // 是否活跃
|
||||
expiresAt?: number; // 过期时间戳
|
||||
teamId?: string; // 团队ID
|
||||
projectId?: string; // 项目ID
|
||||
}
|
||||
|
||||
interface ClickHouseRow {
|
||||
event_id: string;
|
||||
event_attributes: string;
|
||||
interface SyncState {
|
||||
last_sync_time: number;
|
||||
records_synced: number;
|
||||
last_sync_id?: string;
|
||||
}
|
||||
|
||||
export async function main(
|
||||
@@ -58,90 +44,138 @@ export async function main(
|
||||
max_records = 9999999,
|
||||
timeout_minutes = 60,
|
||||
skip_clickhouse_check = false,
|
||||
force_insert = false
|
||||
force_insert = false,
|
||||
database_override = "shorturl_analytics" // 添加数据库名称参数,默认为shorturl_analytics
|
||||
) {
|
||||
const logWithTimestamp = (message: string) => {
|
||||
const now = new Date();
|
||||
console.log(`[${now.toISOString()}] ${message}`);
|
||||
};
|
||||
|
||||
logWithTimestamp("Starting sync from MongoDB to ClickHouse events table");
|
||||
logWithTimestamp(`Batch size: ${batch_size}, Max records: ${max_records}, Timeout: ${timeout_minutes} minutes`);
|
||||
logWithTimestamp("开始执行MongoDB到ClickHouse的同步任务");
|
||||
logWithTimestamp(`批处理大小: ${batch_size}, 最大记录数: ${max_records}, 超时时间: ${timeout_minutes}分钟`);
|
||||
if (skip_clickhouse_check) {
|
||||
logWithTimestamp("⚠️ 警告: 已启用跳过ClickHouse检查模式,不会检查记录是否已存在");
|
||||
}
|
||||
if (force_insert) {
|
||||
logWithTimestamp("⚠️ 警告: 已启用强制插入模式,将尝试插入所有记录");
|
||||
}
|
||||
|
||||
// Set timeout
|
||||
// 设置超时
|
||||
const startTime = Date.now();
|
||||
const timeoutMs = timeout_minutes * 60 * 1000;
|
||||
|
||||
// 检查是否超时
|
||||
const checkTimeout = () => {
|
||||
if (Date.now() - startTime > timeoutMs) {
|
||||
console.log(`Execution time exceeded ${timeout_minutes} minutes, stopping`);
|
||||
console.log(`运行时间超过${timeout_minutes}分钟,暂停执行`);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
// Get MongoDB and ClickHouse connection info
|
||||
// 获取MongoDB和ClickHouse的连接信息
|
||||
let mongoConfig: MongoConfig;
|
||||
let clickhouseConfig: ClickHouseConfig;
|
||||
|
||||
try {
|
||||
const rawMongoConfig = await getVariable("f/shorturl_analytics/mongodb");
|
||||
mongoConfig = typeof rawMongoConfig === "string" ? JSON.parse(rawMongoConfig) : rawMongoConfig;
|
||||
console.log("原始MongoDB配置:", JSON.stringify(rawMongoConfig));
|
||||
|
||||
// 尝试解析配置,如果是字符串形式
|
||||
if (typeof rawMongoConfig === "string") {
|
||||
try {
|
||||
mongoConfig = JSON.parse(rawMongoConfig);
|
||||
} catch (e) {
|
||||
console.error("MongoDB配置解析失败:", e);
|
||||
throw e;
|
||||
}
|
||||
} else {
|
||||
mongoConfig = rawMongoConfig as MongoConfig;
|
||||
}
|
||||
|
||||
const rawClickhouseConfig = await getVariable("f/shorturl_analytics/clickhouse");
|
||||
clickhouseConfig = typeof rawClickhouseConfig === "string" ? JSON.parse(rawClickhouseConfig) : rawClickhouseConfig;
|
||||
console.log("原始ClickHouse配置:", JSON.stringify(rawClickhouseConfig));
|
||||
|
||||
// 尝试解析配置,如果是字符串形式
|
||||
if (typeof rawClickhouseConfig === "string") {
|
||||
try {
|
||||
clickhouseConfig = JSON.parse(rawClickhouseConfig);
|
||||
} catch (e) {
|
||||
console.error("ClickHouse配置解析失败:", e);
|
||||
throw e;
|
||||
}
|
||||
} else {
|
||||
clickhouseConfig = rawClickhouseConfig as ClickHouseConfig;
|
||||
}
|
||||
|
||||
// 检查并修复数据库配置
|
||||
if (!clickhouseConfig.clickhouse_database || clickhouseConfig.clickhouse_database === "undefined") {
|
||||
logWithTimestamp(`⚠️ 警告: 数据库名称未定义或为'undefined',使用提供的默认值: ${database_override}`);
|
||||
clickhouseConfig.clickhouse_database = database_override;
|
||||
}
|
||||
|
||||
console.log("MongoDB配置解析为:", JSON.stringify(mongoConfig));
|
||||
console.log("ClickHouse配置解析为:", JSON.stringify({
|
||||
...clickhouseConfig,
|
||||
clickhouse_password: "****" // 隐藏密码
|
||||
}));
|
||||
|
||||
logWithTimestamp(`将使用ClickHouse数据库: ${clickhouseConfig.clickhouse_database}`);
|
||||
} catch (error) {
|
||||
console.error("Failed to get config:", error);
|
||||
console.error("获取配置失败:", error);
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Build MongoDB connection URL
|
||||
// 构建MongoDB连接URL
|
||||
let mongoUrl = "mongodb://";
|
||||
if (mongoConfig.username && mongoConfig.password) {
|
||||
mongoUrl += `${mongoConfig.username}:${mongoConfig.password}@`;
|
||||
}
|
||||
mongoUrl += `${mongoConfig.host}:${mongoConfig.port}/${mongoConfig.db}`;
|
||||
|
||||
// Connect to MongoDB
|
||||
console.log(`MongoDB连接URL: ${mongoUrl.replace(/:[^:]*@/, ":****@")}`);
|
||||
|
||||
// 连接MongoDB
|
||||
const client = new MongoClient();
|
||||
try {
|
||||
await client.connect(mongoUrl);
|
||||
console.log("MongoDB connected successfully");
|
||||
console.log("MongoDB连接成功");
|
||||
|
||||
const db = client.database(mongoConfig.db);
|
||||
const traceCollection = db.collection<TraceRecord>("trace");
|
||||
const shortCollection = db.collection<ShortRecord>("short");
|
||||
|
||||
// Build query conditions
|
||||
// 构建查询条件,获取所有记录
|
||||
const query: Record<string, unknown> = {
|
||||
type: 1 // Only sync records with type 1
|
||||
type: 1 // 只同步type为1的记录
|
||||
};
|
||||
|
||||
// Count total records
|
||||
// 计算总记录数
|
||||
const totalRecords = await traceCollection.countDocuments(query);
|
||||
console.log(`Found ${totalRecords} records to sync`);
|
||||
console.log(`找到 ${totalRecords} 条记录需要同步`);
|
||||
|
||||
// 限制此次处理的记录数量
|
||||
const recordsToProcess = Math.min(totalRecords, max_records);
|
||||
console.log(`Will process ${recordsToProcess} records`);
|
||||
console.log(`本次将处理 ${recordsToProcess} 条记录`);
|
||||
|
||||
if (totalRecords === 0) {
|
||||
console.log("No records to sync, task completed");
|
||||
console.log("没有记录需要同步,任务完成");
|
||||
return {
|
||||
success: true,
|
||||
records_synced: 0,
|
||||
message: "No records to sync"
|
||||
message: "没有记录需要同步"
|
||||
};
|
||||
}
|
||||
|
||||
// Check ClickHouse connection
|
||||
// 检查ClickHouse连接状态
|
||||
const checkClickHouseConnection = async (): Promise<boolean> => {
|
||||
if (skip_clickhouse_check) {
|
||||
logWithTimestamp("Skipping ClickHouse connection check");
|
||||
logWithTimestamp("已启用跳过ClickHouse检查,不测试连接");
|
||||
return true;
|
||||
}
|
||||
|
||||
try {
|
||||
logWithTimestamp("Testing ClickHouse connection...");
|
||||
logWithTimestamp("测试ClickHouse连接...");
|
||||
const clickhouseUrl = `${clickhouseConfig.clickhouse_url}`;
|
||||
const response = await fetch(clickhouseUrl, {
|
||||
method: "POST",
|
||||
@@ -149,45 +183,61 @@ export async function main(
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Authorization": `Basic ${btoa(`${clickhouseConfig.clickhouse_user}:${clickhouseConfig.clickhouse_password}`)}`,
|
||||
},
|
||||
body: "SELECT 1",
|
||||
body: `SELECT 1 FROM ${clickhouseConfig.clickhouse_database}.events LIMIT 1`,
|
||||
// 设置5秒超时
|
||||
signal: AbortSignal.timeout(5000)
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
logWithTimestamp("ClickHouse connection test successful");
|
||||
logWithTimestamp("ClickHouse连接测试成功");
|
||||
return true;
|
||||
} else {
|
||||
const errorText = await response.text();
|
||||
logWithTimestamp(`ClickHouse connection test failed: ${response.status} ${errorText}`);
|
||||
logWithTimestamp(`ClickHouse连接测试失败: ${response.status} ${errorText}`);
|
||||
return false;
|
||||
}
|
||||
} catch (err) {
|
||||
logWithTimestamp(`ClickHouse connection test failed: ${(err as Error).message}`);
|
||||
const error = err as Error;
|
||||
logWithTimestamp(`ClickHouse连接测试失败: ${error.message}`);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// Check if records exist in ClickHouse
|
||||
// 检查记录是否已经存在于ClickHouse中
|
||||
const checkExistingRecords = async (records: TraceRecord[]): Promise<TraceRecord[]> => {
|
||||
if (records.length === 0) return [];
|
||||
|
||||
// 如果跳过ClickHouse检查或强制插入,则直接返回所有记录
|
||||
if (skip_clickhouse_check || force_insert) {
|
||||
logWithTimestamp(`Skipping ClickHouse duplicate check, will process all ${records.length} records`);
|
||||
logWithTimestamp(`已跳过ClickHouse重复检查,准备处理所有 ${records.length} 条记录`);
|
||||
return records;
|
||||
}
|
||||
|
||||
logWithTimestamp(`正在检查 ${records.length} 条记录是否已存在于ClickHouse中...`);
|
||||
|
||||
try {
|
||||
const recordIds = records.map(record => record._id.toString());
|
||||
// 验证数据库名称
|
||||
if (!clickhouseConfig.clickhouse_database || clickhouseConfig.clickhouse_database === "undefined") {
|
||||
throw new Error("数据库名称未定义或无效,请检查配置");
|
||||
}
|
||||
|
||||
// 提取所有记录的ID
|
||||
const recordIds = records.map(record => record.slugId.toString()); // 使用slugId作为link_id查询
|
||||
logWithTimestamp(`待检查的记录ID: ${recordIds.join(', ')}`);
|
||||
|
||||
// 构建查询SQL,检查记录是否已存在,确保添加FORMAT JSON来获取正确的JSON格式响应
|
||||
const query = `
|
||||
SELECT event_id
|
||||
FROM shorturl_analytics.events
|
||||
WHERE event_attributes LIKE '%"mongo_id":"%'
|
||||
AND event_attributes LIKE ANY ('%${recordIds.join("%' OR '%")}%')
|
||||
SELECT link_id, visitor_id
|
||||
FROM ${clickhouseConfig.clickhouse_database}.events
|
||||
WHERE link_id IN ('${recordIds.join("','")}')
|
||||
FORMAT JSON
|
||||
`;
|
||||
|
||||
const response = await fetch(clickhouseConfig.clickhouse_url, {
|
||||
logWithTimestamp(`执行ClickHouse查询: ${query.replace(/\n\s*/g, ' ')}`);
|
||||
|
||||
// 发送请求到ClickHouse,添加10秒超时
|
||||
const clickhouseUrl = `${clickhouseConfig.clickhouse_url}`;
|
||||
const response = await fetch(clickhouseUrl, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
@@ -199,134 +249,195 @@ export async function main(
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`ClickHouse query error: ${response.status} ${errorText}`);
|
||||
throw new Error(`ClickHouse查询错误: ${response.status} ${errorText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
const existingIds = new Set(result.data.map((row: ClickHouseRow) => {
|
||||
const matches = row.event_attributes.match(/"mongo_id":"([^"]+)"/);
|
||||
return matches ? matches[1] : null;
|
||||
}).filter(Boolean));
|
||||
// 获取响应文本以便记录
|
||||
const responseText = await response.text();
|
||||
logWithTimestamp(`ClickHouse查询响应: ${responseText.slice(0, 200)}${responseText.length > 200 ? '...' : ''}`);
|
||||
|
||||
return records.filter(record => !existingIds.has(record._id.toString()));
|
||||
if (!responseText.trim()) {
|
||||
logWithTimestamp("ClickHouse返回空响应,假定没有记录存在");
|
||||
return records; // 如果响应为空,假设没有记录
|
||||
}
|
||||
|
||||
// 解析结果
|
||||
let result;
|
||||
try {
|
||||
result = JSON.parse(responseText);
|
||||
} catch (err) {
|
||||
logWithTimestamp(`ClickHouse响应不是有效的JSON: ${responseText}`);
|
||||
throw new Error(`解析ClickHouse响应失败: ${(err as Error).message}`);
|
||||
}
|
||||
|
||||
// 确保result有正确的结构
|
||||
if (!result.data) {
|
||||
logWithTimestamp(`ClickHouse响应缺少data字段: ${JSON.stringify(result)}`);
|
||||
return records; // 如果没有data字段,假设没有记录
|
||||
}
|
||||
|
||||
// 提取已存在的记录ID
|
||||
const existingIds = new Set(result.data.map((row: { link_id: string }) => row.link_id));
|
||||
|
||||
logWithTimestamp(`检测到 ${existingIds.size} 条记录已存在于ClickHouse中`);
|
||||
if (existingIds.size > 0) {
|
||||
logWithTimestamp(`已存在的记录ID: ${Array.from(existingIds).join(', ')}`);
|
||||
}
|
||||
|
||||
// 过滤出不存在的记录
|
||||
const newRecords = records.filter(record => !existingIds.has(record.slugId.toString())); // 使用slugId匹配link_id
|
||||
logWithTimestamp(`过滤后剩余 ${newRecords.length} 条新记录需要插入`);
|
||||
|
||||
return newRecords;
|
||||
} catch (err) {
|
||||
logWithTimestamp(`Error checking existing records: ${(err as Error).message}`);
|
||||
return skip_clickhouse_check ? records : [];
|
||||
const error = err as Error;
|
||||
logWithTimestamp(`ClickHouse查询出错: ${error.message}`);
|
||||
if (skip_clickhouse_check) {
|
||||
logWithTimestamp("已启用跳过ClickHouse检查,将继续处理所有记录");
|
||||
return records;
|
||||
} else {
|
||||
throw error; // 如果没有启用跳过检查,则抛出错误
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Process records function
|
||||
// 在处理记录前先检查ClickHouse连接
|
||||
const clickhouseConnected = await checkClickHouseConnection();
|
||||
if (!clickhouseConnected && !skip_clickhouse_check) {
|
||||
logWithTimestamp("⚠️ ClickHouse连接测试失败,请启用skip_clickhouse_check=true参数来跳过连接检查");
|
||||
throw new Error("ClickHouse连接失败,无法继续同步");
|
||||
}
|
||||
|
||||
// 处理记录的函数
|
||||
const processRecords = async (records: TraceRecord[]) => {
|
||||
if (records.length === 0) return 0;
|
||||
|
||||
const newRecords = await checkExistingRecords(records);
|
||||
logWithTimestamp(`开始处理批次数据,共 ${records.length} 条记录...`);
|
||||
|
||||
// 检查记录是否已存在
|
||||
let newRecords;
|
||||
try {
|
||||
newRecords = await checkExistingRecords(records);
|
||||
} catch (err) {
|
||||
const error = err as Error;
|
||||
logWithTimestamp(`检查记录是否存在时出错: ${error.message}`);
|
||||
if (!skip_clickhouse_check && !force_insert) {
|
||||
throw error;
|
||||
}
|
||||
// 如果跳过检查或强制插入,则使用所有记录
|
||||
logWithTimestamp("将使用所有记录进行处理");
|
||||
newRecords = records;
|
||||
}
|
||||
|
||||
if (newRecords.length === 0) {
|
||||
logWithTimestamp("All records already exist, skipping");
|
||||
logWithTimestamp("所有记录都已存在,跳过处理");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Get link information for all records
|
||||
const slugIds = newRecords.map(record => record.slugId);
|
||||
const shortLinks = await shortCollection.find({
|
||||
_id: { $in: slugIds }
|
||||
}).toArray();
|
||||
|
||||
// Create a map for quick lookup
|
||||
const shortLinksMap = new Map(shortLinks.map(link => [link._id.toString(), link]));
|
||||
logWithTimestamp(`准备处理 ${newRecords.length} 条新记录...`);
|
||||
|
||||
// Prepare ClickHouse insert data
|
||||
// 准备ClickHouse插入数据
|
||||
const clickhouseData = newRecords.map(record => {
|
||||
const shortLink = shortLinksMap.get(record.slugId.toString());
|
||||
|
||||
// 将毫秒时间戳转换为 DateTime64(3) 格式
|
||||
const formatDateTime = (timestamp: number) => {
|
||||
return new Date(timestamp).toISOString().replace('T', ' ').replace('Z', '');
|
||||
};
|
||||
|
||||
const eventTime = new Date(record.createTime);
|
||||
// 转换MongoDB记录为ClickHouse格式,匹配ClickHouse表结构
|
||||
return {
|
||||
// Event base information
|
||||
event_id: record._id.toString(),
|
||||
event_time: formatDateTime(record.createTime),
|
||||
event_type: "click",
|
||||
event_attributes: JSON.stringify({
|
||||
original_type: record.type
|
||||
}),
|
||||
|
||||
// Link information from short collection
|
||||
// UUID将由ClickHouse自动生成 (event_id)
|
||||
event_time: eventTime.toISOString().replace('T', ' ').replace('Z', ''),
|
||||
event_type: record.type === 1 ? "visit" : "custom",
|
||||
event_attributes: `{"mongo_id":"${record._id.toString()}"}`,
|
||||
link_id: record.slugId.toString(),
|
||||
link_slug: shortLink?.slug || "",
|
||||
link_slug: "", // 这些字段可能需要从其他表获取
|
||||
link_label: record.label || "",
|
||||
link_title: "",
|
||||
link_original_url: shortLink?.origin || "",
|
||||
link_attributes: JSON.stringify({
|
||||
domain: shortLink?.domain || null
|
||||
}),
|
||||
link_created_at: shortLink?.createTime ? formatDateTime(shortLink.createTime) : formatDateTime(record.createTime),
|
||||
link_expires_at: shortLink?.expiresAt ? formatDateTime(shortLink.expiresAt) : null,
|
||||
link_tags: "[]", // Empty array as default
|
||||
|
||||
// User information
|
||||
user_id: shortLink?.user || "",
|
||||
link_original_url: "",
|
||||
link_attributes: "{}",
|
||||
link_created_at: eventTime.toISOString().replace('T', ' ').replace('Z', ''), // 暂用访问时间代替,可能需要从其他表获取
|
||||
link_expires_at: null,
|
||||
link_tags: "[]",
|
||||
user_id: "",
|
||||
user_name: "",
|
||||
user_email: "",
|
||||
user_attributes: "{}",
|
||||
|
||||
// Team information
|
||||
team_id: shortLink?.teamId || "",
|
||||
team_id: "",
|
||||
team_name: "",
|
||||
team_attributes: "{}",
|
||||
|
||||
// Project information
|
||||
project_id: shortLink?.projectId || "",
|
||||
project_id: "",
|
||||
project_name: "",
|
||||
project_attributes: "{}",
|
||||
|
||||
// QR code information
|
||||
qr_code_id: "",
|
||||
qr_code_name: "",
|
||||
qr_code_attributes: "{}",
|
||||
|
||||
// Visitor information
|
||||
visitor_id: "", // Empty string as default
|
||||
session_id: `${record.slugId.toString()}-${record.createTime}`,
|
||||
ip_address: record.ip || "",
|
||||
country: "",
|
||||
visitor_id: record._id.toString(), // 使用MongoDB ID作为访客ID
|
||||
session_id: record._id.toString() + "-" + record.createTime, // 创建一个唯一会话ID
|
||||
ip_address: record.ip,
|
||||
country: "", // 这些字段在MongoDB中不存在,使用默认值
|
||||
city: "",
|
||||
device_type: record.platform || "",
|
||||
device_type: record.platform || "unknown",
|
||||
browser: record.browser || "",
|
||||
os: record.platformOS || "",
|
||||
user_agent: `${record.browser || ""} ${record.browserVersion || ""}`.trim(),
|
||||
|
||||
// Source information
|
||||
user_agent: record.browser + " " + record.browserVersion,
|
||||
referrer: record.url || "",
|
||||
utm_source: "",
|
||||
utm_medium: "",
|
||||
utm_campaign: "",
|
||||
|
||||
// Interaction information
|
||||
utm_term: "",
|
||||
utm_content: "",
|
||||
time_spent_sec: 0,
|
||||
is_bounce: true,
|
||||
is_qr_scan: false,
|
||||
conversion_type: "visit",
|
||||
conversion_value: 0
|
||||
conversion_value: 0,
|
||||
req_full_path: record.url || ""
|
||||
};
|
||||
});
|
||||
|
||||
// Generate ClickHouse insert SQL
|
||||
const rows = clickhouseData.map(row => {
|
||||
// 只需要处理JSON字符串的转义
|
||||
const formattedRow = {
|
||||
...row,
|
||||
event_attributes: row.event_attributes.replace(/\\/g, '\\\\'),
|
||||
link_attributes: row.link_attributes.replace(/\\/g, '\\\\')
|
||||
};
|
||||
return JSON.stringify(formattedRow);
|
||||
}).join('\n');
|
||||
// 生成ClickHouse插入SQL
|
||||
const insertSQL = `
|
||||
INSERT INTO ${clickhouseConfig.clickhouse_database}.events
|
||||
(event_time, event_type, event_attributes, link_id, link_slug, link_label, link_title,
|
||||
link_original_url, link_attributes, link_created_at, link_expires_at, link_tags,
|
||||
user_id, user_name, user_email, user_attributes, team_id, team_name, team_attributes,
|
||||
project_id, project_name, project_attributes, qr_code_id, qr_code_name, qr_code_attributes,
|
||||
visitor_id, session_id, ip_address, country, city, device_type, browser, os, user_agent,
|
||||
referrer, utm_source, utm_medium, utm_campaign, utm_term, utm_content, time_spent_sec,
|
||||
is_bounce, is_qr_scan, conversion_type, conversion_value, req_full_path)
|
||||
VALUES ${clickhouseData.map(record => {
|
||||
// 确保所有字符串值都是字符串类型,并安全处理替换
|
||||
const safeReplace = (val: unknown): string => {
|
||||
// 确保值是字符串,如果是null或undefined则使用空字符串
|
||||
const str = val === null || val === undefined ? "" : String(val);
|
||||
// 安全替换单引号
|
||||
return str.replace(/'/g, "''");
|
||||
};
|
||||
|
||||
return `('${record.event_time}', '${safeReplace(record.event_type)}', '${safeReplace(record.event_attributes)}',
|
||||
'${record.link_id}', '${safeReplace(record.link_slug)}', '${safeReplace(record.link_label)}', '${safeReplace(record.link_title)}',
|
||||
'${safeReplace(record.link_original_url)}', '${safeReplace(record.link_attributes)}', '${record.link_created_at}',
|
||||
${record.link_expires_at === null ? 'NULL' : `'${record.link_expires_at}'`}, '${safeReplace(record.link_tags)}',
|
||||
'${safeReplace(record.user_id)}', '${safeReplace(record.user_name)}', '${safeReplace(record.user_email)}',
|
||||
'${safeReplace(record.user_attributes)}', '${safeReplace(record.team_id)}', '${safeReplace(record.team_name)}',
|
||||
'${safeReplace(record.team_attributes)}', '${safeReplace(record.project_id)}', '${safeReplace(record.project_name)}',
|
||||
'${safeReplace(record.project_attributes)}', '${safeReplace(record.qr_code_id)}', '${safeReplace(record.qr_code_name)}',
|
||||
'${safeReplace(record.qr_code_attributes)}', '${safeReplace(record.visitor_id)}', '${safeReplace(record.session_id)}',
|
||||
'${safeReplace(record.ip_address)}', '${safeReplace(record.country)}', '${safeReplace(record.city)}',
|
||||
'${safeReplace(record.device_type)}', '${safeReplace(record.browser)}', '${safeReplace(record.os)}',
|
||||
'${safeReplace(record.user_agent)}', '${safeReplace(record.referrer)}', '${safeReplace(record.utm_source)}',
|
||||
'${safeReplace(record.utm_medium)}', '${safeReplace(record.utm_campaign)}', '${safeReplace(record.utm_term)}',
|
||||
'${safeReplace(record.utm_content)}', ${record.time_spent_sec}, ${record.is_bounce}, ${record.is_qr_scan},
|
||||
'${safeReplace(record.conversion_type)}', ${record.conversion_value}, '${safeReplace(record.req_full_path)}')`;
|
||||
}).join(", ")}
|
||||
`;
|
||||
|
||||
const insertSQL = `INSERT INTO shorturl_analytics.events FORMAT JSONEachRow\n${rows}`;
|
||||
if (insertSQL.length === 0) {
|
||||
console.log("没有新记录需要插入");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 发送请求到ClickHouse,添加20秒超时
|
||||
const clickhouseUrl = `${clickhouseConfig.clickhouse_url}`;
|
||||
try {
|
||||
const response = await fetch(clickhouseConfig.clickhouse_url, {
|
||||
logWithTimestamp("发送插入请求到ClickHouse...");
|
||||
const response = await fetch(clickhouseUrl, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
@@ -338,35 +449,33 @@ export async function main(
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`ClickHouse insert error: ${response.status} ${errorText}`);
|
||||
throw new Error(`ClickHouse插入错误: ${response.status} ${errorText}`);
|
||||
}
|
||||
|
||||
logWithTimestamp(`Successfully inserted ${newRecords.length} records to ClickHouse`);
|
||||
logWithTimestamp(`成功插入 ${newRecords.length} 条记录到ClickHouse`);
|
||||
return newRecords.length;
|
||||
} catch (err) {
|
||||
logWithTimestamp(`Failed to insert data to ClickHouse: ${(err as Error).message}`);
|
||||
throw err;
|
||||
const error = err as Error;
|
||||
logWithTimestamp(`向ClickHouse插入数据失败: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
// Check ClickHouse connection before processing
|
||||
const clickhouseConnected = await checkClickHouseConnection();
|
||||
if (!clickhouseConnected && !skip_clickhouse_check) {
|
||||
throw new Error("ClickHouse connection failed, cannot continue sync");
|
||||
}
|
||||
|
||||
// Process records in batches
|
||||
// 批量处理记录
|
||||
let processedRecords = 0;
|
||||
let totalBatchRecords = 0;
|
||||
|
||||
for (let page = 0; processedRecords < recordsToProcess; page++) {
|
||||
// 检查超时
|
||||
if (checkTimeout()) {
|
||||
logWithTimestamp(`Processed ${processedRecords}/${recordsToProcess} records, stopping due to timeout`);
|
||||
logWithTimestamp(`已处理 ${processedRecords}/${recordsToProcess} 条记录,因超时暂停执行`);
|
||||
break;
|
||||
}
|
||||
|
||||
logWithTimestamp(`Processing batch ${page+1}, completed ${processedRecords}/${recordsToProcess} records (${Math.round(processedRecords/recordsToProcess*100)}%)`);
|
||||
// 每批次都输出进度
|
||||
logWithTimestamp(`开始处理第 ${page+1} 批次,已完成 ${processedRecords}/${recordsToProcess} 条记录 (${Math.round(processedRecords/recordsToProcess*100)}%)`);
|
||||
|
||||
logWithTimestamp(`正在从MongoDB获取第 ${page+1} 批次数据...`);
|
||||
const records = await traceCollection.find(
|
||||
query,
|
||||
{
|
||||
@@ -378,32 +487,43 @@ export async function main(
|
||||
).toArray();
|
||||
|
||||
if (records.length === 0) {
|
||||
logWithTimestamp("No more records found, sync complete");
|
||||
logWithTimestamp("没有找到更多数据,同步结束");
|
||||
break;
|
||||
}
|
||||
|
||||
// 找到数据,开始处理
|
||||
logWithTimestamp(`获取到 ${records.length} 条记录,开始处理...`);
|
||||
// 输出当前批次的部分数据信息
|
||||
if (records.length > 0) {
|
||||
logWithTimestamp(`批次 ${page+1} 第一条记录: ID=${records[0]._id}, 时间=${new Date(records[0].createTime).toISOString()}`);
|
||||
if (records.length > 1) {
|
||||
logWithTimestamp(`批次 ${page+1} 最后一条记录: ID=${records[records.length-1]._id}, 时间=${new Date(records[records.length-1].createTime).toISOString()}`);
|
||||
}
|
||||
}
|
||||
|
||||
const batchSize = await processRecords(records);
|
||||
processedRecords += records.length;
|
||||
totalBatchRecords += batchSize;
|
||||
|
||||
logWithTimestamp(`Batch ${page+1} complete. Processed ${processedRecords}/${recordsToProcess} records, inserted ${totalBatchRecords} (${Math.round(processedRecords/recordsToProcess*100)}%)`);
|
||||
logWithTimestamp(`第 ${page+1} 批次处理完成。已处理 ${processedRecords}/${recordsToProcess} 条记录,实际插入 ${totalBatchRecords} 条 (${Math.round(processedRecords/recordsToProcess*100)}%)`);
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
records_processed: processedRecords,
|
||||
records_synced: totalBatchRecords,
|
||||
message: "Data sync completed"
|
||||
message: "数据同步完成"
|
||||
};
|
||||
} catch (err) {
|
||||
console.error("Error during sync:", err);
|
||||
console.error("同步过程中发生错误:", err);
|
||||
return {
|
||||
success: false,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
stack: err instanceof Error ? err.stack : undefined
|
||||
};
|
||||
} finally {
|
||||
// 关闭MongoDB连接
|
||||
await client.close();
|
||||
console.log("MongoDB connection closed");
|
||||
console.log("MongoDB连接已关闭");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user