funel data

This commit is contained in:
2025-03-11 00:36:22 +08:00
parent 7857a9007a
commit bc42ff4dbf
13 changed files with 2171 additions and 11 deletions

View File

@@ -0,0 +1,309 @@
require('dotenv').config();
const { createClient } = require('@clickhouse/client');
const { v4: uuidv4 } = require('uuid');
const http = require('http');
// 创建ClickHouse客户端
const client = createClient({
host: `http://${process.env.CLICKHOUSE_HOST || 'localhost'}:${process.env.CLICKHOUSE_PORT || 8123}`,
username: process.env.CLICKHOUSE_USER || 'default',
password: process.env.CLICKHOUSE_PASSWORD || '',
database: process.env.CLICKHOUSE_DATABASE || 'promote',
});
// 生成随机日期,在指定天数范围内,返回格式化的日期字符串
function randomDate(daysBack = 30) {
const date = new Date();
date.setDate(date.getDate() - Math.floor(Math.random() * daysBack));
return date.toISOString().slice(0, 19).replace('T', ' '); // 格式: YYYY-MM-DD HH:MM:SS
}
// 生成随机数字,在指定范围内
function randomNumber(min, max) {
return Math.floor(Math.random() * (max - min + 1)) + min;
}
// 生成随机IP地址
function randomIP() {
return `${randomNumber(1, 255)}.${randomNumber(0, 255)}.${randomNumber(0, 255)}.${randomNumber(0, 255)}`;
}
// 生成随机用户代理字符串
function randomUserAgent() {
const browsers = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
'Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1',
'Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1'
];
return browsers[randomNumber(0, browsers.length - 1)];
}
// 使用HTTP直接发送请求到ClickHouse
function sendClickHouseQuery(query) {
return new Promise((resolve, reject) => {
// 添加认证信息
const username = process.env.CLICKHOUSE_USER || 'default';
const password = process.env.CLICKHOUSE_PASSWORD || '';
const auth = Buffer.from(`${username}:${password}`).toString('base64');
const options = {
hostname: process.env.CLICKHOUSE_HOST || 'localhost',
port: process.env.CLICKHOUSE_PORT || 8123,
path: `/?database=${process.env.CLICKHOUSE_DATABASE || 'promote'}`,
method: 'POST',
headers: {
'Content-Type': 'text/plain',
'Authorization': `Basic ${auth}`
}
};
const req = http.request(options, (res) => {
let data = '';
res.on('data', (chunk) => {
data += chunk;
});
res.on('end', () => {
if (res.statusCode >= 200 && res.statusCode < 300) {
resolve(data);
} else {
reject(new Error(`HTTP Error: ${res.statusCode} - ${data}`));
}
});
});
req.on('error', (error) => {
reject(error);
});
req.write(query);
req.end();
});
}
// 检查ClickHouse服务器是否可用
async function checkClickHouseConnection() {
console.log('检查ClickHouse连接...');
try {
const result = await sendClickHouseQuery('SELECT 1');
console.log('ClickHouse连接成功');
return true;
} catch (error) {
console.error('ClickHouse连接失败:', error.message);
return false;
}
}
// 检查ClickHouse表是否存在
async function checkAndCreateTables() {
console.log('检查ClickHouse表是否存在...');
try {
// 创建view_events表
await sendClickHouseQuery(`
CREATE TABLE IF NOT EXISTS view_events (
user_id String,
content_id String,
timestamp DateTime DEFAULT now(),
ip String,
user_agent String
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(timestamp)
ORDER BY (user_id, content_id, timestamp)
`);
// 创建like_events表
await sendClickHouseQuery(`
CREATE TABLE IF NOT EXISTS like_events (
user_id String,
content_id String,
timestamp DateTime DEFAULT now(),
action UInt8
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(timestamp)
ORDER BY (user_id, content_id, timestamp)
`);
// 创建follower_events表
await sendClickHouseQuery(`
CREATE TABLE IF NOT EXISTS follower_events (
follower_id String,
followed_id String,
timestamp DateTime DEFAULT now(),
action UInt8
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(timestamp)
ORDER BY (follower_id, followed_id, timestamp)
`);
console.log('表检查完成');
return true;
} catch (error) {
console.error('检查或创建表失败:', error);
return false;
}
}
// 插入测试浏览事件数据
async function insertViewEvents(count = 100) {
console.log(`开始插入${count}个浏览事件...`);
try {
// 每批次插入的数量
const batchSize = 10;
const batches = Math.ceil(count / batchSize);
for (let batch = 0; batch < batches; batch++) {
const startIdx = batch * batchSize;
const endIdx = Math.min(startIdx + batchSize, count);
const batchCount = endIdx - startIdx;
let query = 'INSERT INTO view_events (user_id, content_id, timestamp, ip, user_agent) VALUES ';
for (let i = 0; i < batchCount; i++) {
const userId = `user_${randomNumber(1, 100)}`;
const contentId = `content_${randomNumber(1, 50)}`;
const timestamp = randomDate(30);
const ip = randomIP();
const userAgent = randomUserAgent().replace(/'/g, "\\'"); // 转义单引号
query += `('${userId}', '${contentId}', '${timestamp}', '${ip}', '${userAgent}')`;
if (i < batchCount - 1) {
query += ', ';
}
}
await sendClickHouseQuery(query);
console.log(`已插入 ${Math.min((batch + 1) * batchSize, count)} 个浏览事件...`);
}
console.log(`成功插入${count}个浏览事件`);
return true;
} catch (error) {
console.error('插入浏览事件失败:', error);
return false;
}
}
// 插入测试点赞事件数据
async function insertLikeEvents(count = 50) {
console.log(`开始插入${count}个点赞事件...`);
try {
// 每批次插入的数量
const batchSize = 10;
const batches = Math.ceil(count / batchSize);
for (let batch = 0; batch < batches; batch++) {
const startIdx = batch * batchSize;
const endIdx = Math.min(startIdx + batchSize, count);
const batchCount = endIdx - startIdx;
let query = 'INSERT INTO like_events (user_id, content_id, timestamp, action) VALUES ';
for (let i = 0; i < batchCount; i++) {
const userId = `user_${randomNumber(1, 100)}`;
const contentId = `content_${randomNumber(1, 50)}`;
const timestamp = randomDate(30);
const action = randomNumber(1, 10) <= 8 ? 1 : 2; // 80%是点赞20%是取消点赞
query += `('${userId}', '${contentId}', '${timestamp}', ${action})`;
if (i < batchCount - 1) {
query += ', ';
}
}
await sendClickHouseQuery(query);
console.log(`已插入 ${Math.min((batch + 1) * batchSize, count)} 个点赞事件...`);
}
console.log(`成功插入${count}个点赞事件`);
return true;
} catch (error) {
console.error('插入点赞事件失败:', error);
return false;
}
}
// 插入测试关注事件数据
async function insertFollowerEvents(count = 30) {
console.log(`开始插入${count}个关注事件...`);
try {
// 每批次插入的数量
const batchSize = 10;
const batches = Math.ceil(count / batchSize);
for (let batch = 0; batch < batches; batch++) {
const startIdx = batch * batchSize;
const endIdx = Math.min(startIdx + batchSize, count);
const batchCount = endIdx - startIdx;
let query = 'INSERT INTO follower_events (follower_id, followed_id, timestamp, action) VALUES ';
for (let i = 0; i < batchCount; i++) {
const followerId = `user_${randomNumber(1, 100)}`;
const followedId = `influencer_${randomNumber(1, 20)}`;
const timestamp = randomDate(30);
const action = randomNumber(1, 10) <= 8 ? 1 : 2; // 80%是关注20%是取消关注
query += `('${followerId}', '${followedId}', '${timestamp}', ${action})`;
if (i < batchCount - 1) {
query += ', ';
}
}
await sendClickHouseQuery(query);
console.log(`已插入 ${Math.min((batch + 1) * batchSize, count)} 个关注事件...`);
}
console.log(`成功插入${count}个关注事件`);
return true;
} catch (error) {
console.error('插入关注事件失败:', error);
return false;
}
}
// 主函数
async function main() {
console.log('开始插入ClickHouse测试数据...');
try {
// 检查ClickHouse连接
const connectionOk = await checkClickHouseConnection();
if (!connectionOk) {
console.error('无法连接到ClickHouse服务器请检查配置和服务器状态');
return;
}
// 检查并创建表
await checkAndCreateTables();
// 插入测试浏览事件
await insertViewEvents(100);
// 插入测试点赞事件
await insertLikeEvents(50);
// 插入测试关注事件
await insertFollowerEvents(30);
console.log('所有ClickHouse测试数据插入完成');
} catch (error) {
console.error('插入ClickHouse测试数据过程中发生错误:', error);
} finally {
// 关闭客户端连接
await client.close();
}
}
// 执行主函数
main();