Files
promote/backend/scripts/insert-clickhouse-test-data.js
2025-03-11 00:36:22 +08:00

309 lines
9.6 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
require('dotenv').config();
const { createClient } = require('@clickhouse/client');
const { v4: uuidv4 } = require('uuid');
const http = require('http');
// 创建ClickHouse客户端
const client = createClient({
host: `http://${process.env.CLICKHOUSE_HOST || 'localhost'}:${process.env.CLICKHOUSE_PORT || 8123}`,
username: process.env.CLICKHOUSE_USER || 'default',
password: process.env.CLICKHOUSE_PASSWORD || '',
database: process.env.CLICKHOUSE_DATABASE || 'promote',
});
// 生成随机日期,在指定天数范围内,返回格式化的日期字符串
function randomDate(daysBack = 30) {
const date = new Date();
date.setDate(date.getDate() - Math.floor(Math.random() * daysBack));
return date.toISOString().slice(0, 19).replace('T', ' '); // 格式: YYYY-MM-DD HH:MM:SS
}
// 生成随机数字,在指定范围内
function randomNumber(min, max) {
return Math.floor(Math.random() * (max - min + 1)) + min;
}
// 生成随机IP地址
function randomIP() {
return `${randomNumber(1, 255)}.${randomNumber(0, 255)}.${randomNumber(0, 255)}.${randomNumber(0, 255)}`;
}
// 生成随机用户代理字符串
function randomUserAgent() {
const browsers = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
'Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1',
'Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1'
];
return browsers[randomNumber(0, browsers.length - 1)];
}
// 使用HTTP直接发送请求到ClickHouse
function sendClickHouseQuery(query) {
return new Promise((resolve, reject) => {
// 添加认证信息
const username = process.env.CLICKHOUSE_USER || 'default';
const password = process.env.CLICKHOUSE_PASSWORD || '';
const auth = Buffer.from(`${username}:${password}`).toString('base64');
const options = {
hostname: process.env.CLICKHOUSE_HOST || 'localhost',
port: process.env.CLICKHOUSE_PORT || 8123,
path: `/?database=${process.env.CLICKHOUSE_DATABASE || 'promote'}`,
method: 'POST',
headers: {
'Content-Type': 'text/plain',
'Authorization': `Basic ${auth}`
}
};
const req = http.request(options, (res) => {
let data = '';
res.on('data', (chunk) => {
data += chunk;
});
res.on('end', () => {
if (res.statusCode >= 200 && res.statusCode < 300) {
resolve(data);
} else {
reject(new Error(`HTTP Error: ${res.statusCode} - ${data}`));
}
});
});
req.on('error', (error) => {
reject(error);
});
req.write(query);
req.end();
});
}
// 检查ClickHouse服务器是否可用
async function checkClickHouseConnection() {
console.log('检查ClickHouse连接...');
try {
const result = await sendClickHouseQuery('SELECT 1');
console.log('ClickHouse连接成功');
return true;
} catch (error) {
console.error('ClickHouse连接失败:', error.message);
return false;
}
}
// 检查ClickHouse表是否存在
async function checkAndCreateTables() {
console.log('检查ClickHouse表是否存在...');
try {
// 创建view_events表
await sendClickHouseQuery(`
CREATE TABLE IF NOT EXISTS view_events (
user_id String,
content_id String,
timestamp DateTime DEFAULT now(),
ip String,
user_agent String
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(timestamp)
ORDER BY (user_id, content_id, timestamp)
`);
// 创建like_events表
await sendClickHouseQuery(`
CREATE TABLE IF NOT EXISTS like_events (
user_id String,
content_id String,
timestamp DateTime DEFAULT now(),
action UInt8
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(timestamp)
ORDER BY (user_id, content_id, timestamp)
`);
// 创建follower_events表
await sendClickHouseQuery(`
CREATE TABLE IF NOT EXISTS follower_events (
follower_id String,
followed_id String,
timestamp DateTime DEFAULT now(),
action UInt8
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(timestamp)
ORDER BY (follower_id, followed_id, timestamp)
`);
console.log('表检查完成');
return true;
} catch (error) {
console.error('检查或创建表失败:', error);
return false;
}
}
// 插入测试浏览事件数据
async function insertViewEvents(count = 100) {
console.log(`开始插入${count}个浏览事件...`);
try {
// 每批次插入的数量
const batchSize = 10;
const batches = Math.ceil(count / batchSize);
for (let batch = 0; batch < batches; batch++) {
const startIdx = batch * batchSize;
const endIdx = Math.min(startIdx + batchSize, count);
const batchCount = endIdx - startIdx;
let query = 'INSERT INTO view_events (user_id, content_id, timestamp, ip, user_agent) VALUES ';
for (let i = 0; i < batchCount; i++) {
const userId = `user_${randomNumber(1, 100)}`;
const contentId = `content_${randomNumber(1, 50)}`;
const timestamp = randomDate(30);
const ip = randomIP();
const userAgent = randomUserAgent().replace(/'/g, "\\'"); // 转义单引号
query += `('${userId}', '${contentId}', '${timestamp}', '${ip}', '${userAgent}')`;
if (i < batchCount - 1) {
query += ', ';
}
}
await sendClickHouseQuery(query);
console.log(`已插入 ${Math.min((batch + 1) * batchSize, count)} 个浏览事件...`);
}
console.log(`成功插入${count}个浏览事件`);
return true;
} catch (error) {
console.error('插入浏览事件失败:', error);
return false;
}
}
// 插入测试点赞事件数据
async function insertLikeEvents(count = 50) {
console.log(`开始插入${count}个点赞事件...`);
try {
// 每批次插入的数量
const batchSize = 10;
const batches = Math.ceil(count / batchSize);
for (let batch = 0; batch < batches; batch++) {
const startIdx = batch * batchSize;
const endIdx = Math.min(startIdx + batchSize, count);
const batchCount = endIdx - startIdx;
let query = 'INSERT INTO like_events (user_id, content_id, timestamp, action) VALUES ';
for (let i = 0; i < batchCount; i++) {
const userId = `user_${randomNumber(1, 100)}`;
const contentId = `content_${randomNumber(1, 50)}`;
const timestamp = randomDate(30);
const action = randomNumber(1, 10) <= 8 ? 1 : 2; // 80%是点赞20%是取消点赞
query += `('${userId}', '${contentId}', '${timestamp}', ${action})`;
if (i < batchCount - 1) {
query += ', ';
}
}
await sendClickHouseQuery(query);
console.log(`已插入 ${Math.min((batch + 1) * batchSize, count)} 个点赞事件...`);
}
console.log(`成功插入${count}个点赞事件`);
return true;
} catch (error) {
console.error('插入点赞事件失败:', error);
return false;
}
}
// 插入测试关注事件数据
async function insertFollowerEvents(count = 30) {
console.log(`开始插入${count}个关注事件...`);
try {
// 每批次插入的数量
const batchSize = 10;
const batches = Math.ceil(count / batchSize);
for (let batch = 0; batch < batches; batch++) {
const startIdx = batch * batchSize;
const endIdx = Math.min(startIdx + batchSize, count);
const batchCount = endIdx - startIdx;
let query = 'INSERT INTO follower_events (follower_id, followed_id, timestamp, action) VALUES ';
for (let i = 0; i < batchCount; i++) {
const followerId = `user_${randomNumber(1, 100)}`;
const followedId = `influencer_${randomNumber(1, 20)}`;
const timestamp = randomDate(30);
const action = randomNumber(1, 10) <= 8 ? 1 : 2; // 80%是关注20%是取消关注
query += `('${followerId}', '${followedId}', '${timestamp}', ${action})`;
if (i < batchCount - 1) {
query += ', ';
}
}
await sendClickHouseQuery(query);
console.log(`已插入 ${Math.min((batch + 1) * batchSize, count)} 个关注事件...`);
}
console.log(`成功插入${count}个关注事件`);
return true;
} catch (error) {
console.error('插入关注事件失败:', error);
return false;
}
}
// 主函数
async function main() {
console.log('开始插入ClickHouse测试数据...');
try {
// 检查ClickHouse连接
const connectionOk = await checkClickHouseConnection();
if (!connectionOk) {
console.error('无法连接到ClickHouse服务器请检查配置和服务器状态');
return;
}
// 检查并创建表
await checkAndCreateTables();
// 插入测试浏览事件
await insertViewEvents(100);
// 插入测试点赞事件
await insertLikeEvents(50);
// 插入测试关注事件
await insertFollowerEvents(30);
console.log('所有ClickHouse测试数据插入完成');
} catch (error) {
console.error('插入ClickHouse测试数据过程中发生错误:', error);
} finally {
// 关闭客户端连接
await client.close();
}
}
// 执行主函数
main();