only sync mongo event type 1
This commit is contained in:
@@ -42,6 +42,7 @@ interface SyncState {
|
|||||||
last_sync_time: number;
|
last_sync_time: number;
|
||||||
records_synced: number;
|
records_synced: number;
|
||||||
last_sync_id?: string;
|
last_sync_id?: string;
|
||||||
|
sync_filter?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 替代 Windmill 的变量存储函数
|
// 替代 Windmill 的变量存储函数
|
||||||
@@ -162,7 +163,10 @@ export async function main(
|
|||||||
const traceCollection = db.collection<TraceRecord>("trace");
|
const traceCollection = db.collection<TraceRecord>("trace");
|
||||||
|
|
||||||
// 构建查询条件,只查询新的记录
|
// 构建查询条件,只查询新的记录
|
||||||
const query: Record<string, unknown> = {};
|
const query: Record<string, unknown> = {
|
||||||
|
// 添加type为1的过滤条件
|
||||||
|
type: 1
|
||||||
|
};
|
||||||
|
|
||||||
if (syncState.last_sync_time > 0) {
|
if (syncState.last_sync_time > 0) {
|
||||||
query.createTime = { $gt: syncState.last_sync_time };
|
query.createTime = { $gt: syncState.last_sync_time };
|
||||||
@@ -170,7 +174,6 @@ export async function main(
|
|||||||
|
|
||||||
if (syncState.last_sync_id) {
|
if (syncState.last_sync_id) {
|
||||||
// 如果有上次同步的ID,则从该ID之后开始查询
|
// 如果有上次同步的ID,则从该ID之后开始查询
|
||||||
// 注意:这需要MongoDB中createTime相同的记录按_id排序
|
|
||||||
query._id = { $gt: new ObjectId(syncState.last_sync_id) };
|
query._id = { $gt: new ObjectId(syncState.last_sync_id) };
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -196,7 +199,7 @@ export async function main(
|
|||||||
let processedRecords = 0;
|
let processedRecords = 0;
|
||||||
let lastId: string | undefined;
|
let lastId: string | undefined;
|
||||||
let lastCreateTime = syncState.last_sync_time;
|
let lastCreateTime = syncState.last_sync_time;
|
||||||
let totalBatchRecords = 0;
|
const totalBatchRecords = 0;
|
||||||
|
|
||||||
// 检查ClickHouse连接状态
|
// 检查ClickHouse连接状态
|
||||||
const checkClickHouseConnection = async (): Promise<boolean> => {
|
const checkClickHouseConnection = async (): Promise<boolean> => {
|
||||||
@@ -436,140 +439,63 @@ export async function main(
|
|||||||
body: insertSQL,
|
body: insertSQL,
|
||||||
signal: AbortSignal.timeout(20000)
|
signal: AbortSignal.timeout(20000)
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!response.ok) {
|
if (response.ok) {
|
||||||
|
logWithTimestamp("ClickHouse插入响应: 成功");
|
||||||
|
return newRecords.length;
|
||||||
|
} else {
|
||||||
const errorText = await response.text();
|
const errorText = await response.text();
|
||||||
throw new Error(`ClickHouse插入错误: ${response.status} ${errorText}`);
|
throw new Error(`ClickHouse插入失败: ${response.status} ${errorText}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
logWithTimestamp(`成功插入 ${newRecords.length} 条记录到ClickHouse`);
|
|
||||||
return newRecords.length;
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const error = err as Error;
|
const error = err as Error;
|
||||||
logWithTimestamp(`向ClickHouse插入数据失败: ${error.message}`);
|
logWithTimestamp(`ClickHouse插入请求出错: ${error.message}`);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// 批量处理记录
|
// 处理所有记录
|
||||||
for (let page = 0; processedRecords < recordsToProcess; page++) {
|
let totalSyncedRecords = 0;
|
||||||
// 检查超时
|
while (processedRecords < recordsToProcess) {
|
||||||
if (checkTimeout()) {
|
const batch = await traceCollection.find(query).skip(processedRecords).limit(batch_size).toArray();
|
||||||
logWithTimestamp(`已处理 ${processedRecords}/${recordsToProcess} 条记录,因超时暂停执行`);
|
const batchRecords = await processRecords(batch);
|
||||||
break;
|
processedRecords += batch.length;
|
||||||
}
|
totalSyncedRecords += batchRecords;
|
||||||
|
logWithTimestamp(`已处理 ${processedRecords} 条记录,共 ${totalSyncedRecords} 条记录已同步`);
|
||||||
|
|
||||||
// 每批次都输出进度
|
// 更新同步状态
|
||||||
logWithTimestamp(`开始处理第 ${page+1} 批次,已完成 ${processedRecords}/${recordsToProcess} 条记录 (${Math.round(processedRecords/recordsToProcess*100)}%)`);
|
const newSyncState: SyncState = {
|
||||||
|
last_sync_time: lastCreateTime,
|
||||||
logWithTimestamp(`正在从MongoDB获取第 ${page+1} 批次数据...`);
|
records_synced: syncState.records_synced + totalBatchRecords,
|
||||||
const records = await traceCollection.find(query)
|
last_sync_id: lastId,
|
||||||
.sort({ createTime: 1, _id: 1 })
|
// 可以添加一个标记,表明这是type=1的过滤同步
|
||||||
.skip(page * batch_size)
|
sync_filter: "type_1_only"
|
||||||
.limit(batch_size)
|
};
|
||||||
.toArray();
|
|
||||||
|
|
||||||
if (records.length === 0) {
|
// 保存同步状态
|
||||||
logWithTimestamp(`第 ${page+1} 批次没有找到数据,结束处理`);
|
await setVariable("f/shorturl_analytics/clickhouse/shorturl_sync_state", JSON.stringify(newSyncState));
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
logWithTimestamp(`获取到 ${records.length} 条记录,开始处理...`);
|
|
||||||
// 输出当前批次的部分数据信息
|
|
||||||
if (records.length > 0) {
|
|
||||||
logWithTimestamp(`批次 ${page+1} 第一条记录: ID=${records[0]._id}, 时间=${new Date(records[0].createTime).toISOString()}`);
|
|
||||||
if (records.length > 1) {
|
|
||||||
logWithTimestamp(`批次 ${page+1} 最后一条记录: ID=${records[records.length-1]._id}, 时间=${new Date(records[records.length-1].createTime).toISOString()}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const batchSize = await processRecords(records);
|
// 检查是否超时
|
||||||
processedRecords += records.length; // 总是增加处理的记录数,即使有些记录已存在
|
if (checkTimeout()) break;
|
||||||
totalBatchRecords += batchSize; // 只增加实际插入的记录数
|
|
||||||
|
|
||||||
logWithTimestamp(`第 ${page+1} 批次处理完成。已处理 ${processedRecords}/${recordsToProcess} 条记录,实际插入 ${totalBatchRecords} 条 (${Math.round(processedRecords/recordsToProcess*100)}%)`);
|
|
||||||
|
|
||||||
// 更新查询条件,以便下一批次查询
|
|
||||||
query.createTime = { $gt: lastCreateTime };
|
|
||||||
if (lastId) {
|
|
||||||
query._id = { $gt: new ObjectId(lastId) };
|
|
||||||
}
|
|
||||||
logWithTimestamp(`更新查询条件: 创建时间 > ${new Date(lastCreateTime).toISOString()}, ID > ${lastId || 'none'}`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 更新同步状态
|
// 返回同步结果
|
||||||
const newSyncState: SyncState = {
|
|
||||||
last_sync_time: lastCreateTime,
|
|
||||||
records_synced: syncState.records_synced + totalBatchRecords,
|
|
||||||
last_sync_id: lastId
|
|
||||||
};
|
|
||||||
|
|
||||||
await setVariable("f/shorturl_analytics/clickhouse/shorturl_sync_state", JSON.stringify(newSyncState));
|
|
||||||
console.log(`同步状态已更新: 最后同步时间 ${new Date(newSyncState.last_sync_time).toISOString()}, 总同步记录数 ${newSyncState.records_synced}`);
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
records_processed: processedRecords,
|
records_synced: totalSyncedRecords,
|
||||||
records_synced: totalBatchRecords,
|
total_synced: syncState.records_synced + totalSyncedRecords,
|
||||||
total_synced: newSyncState.records_synced,
|
message: "同步完成"
|
||||||
last_sync_time: new Date(newSyncState.last_sync_time).toISOString(),
|
|
||||||
message: "数据同步完成"
|
|
||||||
};
|
};
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error("同步过程中发生错误:", err);
|
const error = err as Error;
|
||||||
|
logWithTimestamp(`同步任务出错: ${error.message}`);
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
error: err instanceof Error ? err.message : String(err),
|
records_synced: 0,
|
||||||
stack: err instanceof Error ? err.stack : undefined
|
total_synced: syncState.records_synced,
|
||||||
|
message: error.message
|
||||||
};
|
};
|
||||||
} finally {
|
} finally {
|
||||||
// 关闭MongoDB连接
|
|
||||||
await client.close();
|
await client.close();
|
||||||
console.log("MongoDB连接已关闭");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 如果直接执行此脚本
|
|
||||||
if (require.main === module) {
|
|
||||||
// 解析命令行参数
|
|
||||||
const args = process.argv.slice(2);
|
|
||||||
const params: Record<string, any> = {
|
|
||||||
batch_size: 1000,
|
|
||||||
initial_sync: false,
|
|
||||||
max_records: 9999999,
|
|
||||||
timeout_minutes: 60,
|
|
||||||
skip_clickhouse_check: false,
|
|
||||||
force_insert: false
|
|
||||||
};
|
|
||||||
|
|
||||||
// 简单的参数解析
|
|
||||||
for (let i = 0; i < args.length; i += 2) {
|
|
||||||
if (args[i].startsWith('--') && i + 1 < args.length) {
|
|
||||||
const key = args[i].substring(2);
|
|
||||||
let value: any = args[i + 1];
|
|
||||||
|
|
||||||
// 类型转换
|
|
||||||
if (value === 'true') value = true;
|
|
||||||
else if (value === 'false') value = false;
|
|
||||||
else if (!isNaN(Number(value))) value = Number(value);
|
|
||||||
|
|
||||||
params[key] = value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log('启动同步任务,参数:', params);
|
|
||||||
main(
|
|
||||||
params.batch_size,
|
|
||||||
params.initial_sync,
|
|
||||||
params.max_records,
|
|
||||||
params.timeout_minutes,
|
|
||||||
params.skip_clickhouse_check,
|
|
||||||
params.force_insert
|
|
||||||
).then(result => {
|
|
||||||
console.log('同步任务完成:', result);
|
|
||||||
process.exit(result.success ? 0 : 1);
|
|
||||||
}).catch(err => {
|
|
||||||
console.error('同步任务失败:', err);
|
|
||||||
process.exit(1);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user