Files
promote/backend/scripts/generate-fake-data.ts
2025-03-10 20:27:12 +08:00

483 lines
17 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import axios from 'axios';
import dotenv from 'dotenv';
import { v4 as uuidv4 } from 'uuid';
import { createClient } from '@clickhouse/client';
// 加载环境变量
dotenv.config();
// 配置参数
const API_URL = `http://localhost:${process.env.PORT || 4000}/api`;
let AUTH_TOKEN = ''; // 登录后填入有权限的认证令牌
const CLICKHOUSE_HOST = process.env.CLICKHOUSE_HOST || 'localhost';
const CLICKHOUSE_PORT = process.env.CLICKHOUSE_PORT || '8123';
const CLICKHOUSE_USER = process.env.CLICKHOUSE_USER || 'admin';
const CLICKHOUSE_PASSWORD = process.env.CLICKHOUSE_PASSWORD || 'your_secure_password';
const CLICKHOUSE_DATABASE = process.env.CLICKHOUSE_DATABASE || 'promote';
// 设置时间范围 - 生成过去 1 年数据
const END_DATE = new Date();
const START_DATE = new Date();
START_DATE.setFullYear(END_DATE.getFullYear() - 1); // 1年前
// 测试账号
const TEST_ACCOUNT = {
email: 'vitalitymailg@gmail.com',
password: 'password123'
};
// 测试数据 - 可以根据需要修改
const TEST_DATA = {
influencerIds: [
'20000000-0000-0000-0000-000000000001',
'20000000-0000-0000-0000-000000000002',
'20000000-0000-0000-0000-000000000003',
'20000000-0000-0000-0000-000000000004',
'20000000-0000-0000-0000-000000000005'
],
projectIds: [
'10000000-0000-0000-0000-000000000001',
'10000000-0000-0000-0000-000000000002'
],
platformList: ['youtube', 'instagram', 'tiktok', 'twitter', 'facebook']
};
// 帖子内容模板
const POST_TEMPLATES = [
{ title: '产品介绍视频', description: '展示产品功能和优势' },
{ title: '使用教程', description: '详细介绍如何使用产品的功能' },
{ title: '用户案例分享', description: '真实用户使用产品的体验分享' },
{ title: '行业趋势解析', description: '分析当前行业发展趋势和机遇' },
{ title: '产品评测', description: '全面评测产品的性能和体验' },
{ title: '问答视频', description: '解答用户常见问题' },
{ title: '活动宣传', description: '推广最新的营销活动和优惠' },
{ title: '幕后花絮', description: '分享产品研发和团队的故事' },
{ title: '用户反馈总结', description: '汇总用户反馈并分享改进计划' },
{ title: '行业对比分析', description: '与竞品的对比分析' }
];
// 评论模板
const COMMENT_TEMPLATES = [
'这个产品太棒了!我已经使用了一个月,效果非常好。',
'请问这个产品适合初学者使用吗?',
'价格有点高,但质量确实不错。',
'我有一个问题,能否详细说明一下第三个功能怎么用?',
'已经购买并且推荐给朋友了,大家都很满意。',
'比我之前用的产品好用多了,界面设计也很友好。',
'发货速度快,客服态度好,产品质量一流!',
'还在考虑要不要买,有没有更多实际使用的案例?',
'刚收到货,包装很精美,期待使用效果。',
'产品有保修吗?保修期是多久?'
];
// 设置请求客户端
const apiClient = axios.create({
baseURL: API_URL,
timeout: 10000,
validateStatus: () => true // 不抛出HTTP错误
});
// 创建 ClickHouse 客户端
const clickhouse = createClient({
url: `http://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT}`,
username: CLICKHOUSE_USER,
password: CLICKHOUSE_PASSWORD,
database: CLICKHOUSE_DATABASE,
});
// 设置请求拦截器添加令牌
apiClient.interceptors.request.use(config => {
if (AUTH_TOKEN) {
config.headers.Authorization = `Bearer ${AUTH_TOKEN}`;
}
return config;
});
// 生成随机数
function getRandomInt(min: number, max: number): number {
return Math.floor(Math.random() * (max - min + 1)) + min;
}
// 生成随机日期
function getRandomDate(start: Date, end: Date): Date {
return new Date(start.getTime() + Math.random() * (end.getTime() - start.getTime()));
}
// 获取认证令牌
async function login(): Promise<string> {
try {
console.log('🔑 登录获取认证令牌...');
const response = await apiClient.post('/auth/login', {
email: TEST_ACCOUNT.email,
password: TEST_ACCOUNT.password
});
console.log(`登录响应状态码: ${response.status}`);
if (response.status !== 200) {
throw new Error(`登录失败: ${JSON.stringify(response.data)}`);
}
let token = '';
// 尝试从多个可能的位置获取令牌
if (response.data.access_token) {
token = response.data.access_token;
} else if (response.data.token) {
token = response.data.token;
} else if (response.data.data?.session?.access_token) {
token = response.data.data.session.access_token;
} else {
console.log('警告: 未找到标准令牌格式,使用整个响应作为令牌');
token = response.data;
}
console.log('✅ 登录成功');
return token;
} catch (error) {
console.error('❌ 登录失败:', error);
return '';
}
}
// 创建多个假帖子
async function createFakePosts(count: number): Promise<string[]> {
console.log(`\n📝 开始创建 ${count} 个假帖子...`);
const postIds: string[] = [];
for (let i = 0; i < count; i++) {
try {
const template = POST_TEMPLATES[Math.floor(Math.random() * POST_TEMPLATES.length)];
const influencerId = TEST_DATA.influencerIds[Math.floor(Math.random() * TEST_DATA.influencerIds.length)];
const projectId = TEST_DATA.projectIds[Math.floor(Math.random() * TEST_DATA.projectIds.length)];
const platform = TEST_DATA.platformList[Math.floor(Math.random() * TEST_DATA.platformList.length)];
// 生成随机发布日期(过去一年内)
const publishDate = getRandomDate(START_DATE, END_DATE);
// 为不同平台生成不同格式的URL
let postUrl = '';
if (platform === 'youtube') {
postUrl = `https://youtube.com/watch?v=${uuidv4().substring(0, 8)}`;
} else if (platform === 'instagram') {
postUrl = `https://instagram.com/p/${generateRandomString(10)}`;
} else if (platform === 'tiktok') {
postUrl = `https://tiktok.com/@user/video/${Math.floor(Math.random() * 10000000000)}`;
} else if (platform === 'twitter') {
postUrl = `https://twitter.com/user/status/${Math.floor(Math.random() * 10000000000)}`;
} else if (platform === 'facebook') {
postUrl = `https://facebook.com/user/posts/${uuidv4().substring(0, 10)}`;
} else {
postUrl = `https://example.com/post/${uuidv4()}`;
}
const postData = {
title: `${template.title} #${i + 1}`,
description: `${template.description} - 自动生成的测试内容 ${i + 1}`,
content: `这是一个自动生成的测试帖子内容。这是帖子 #${i + 1},包含有关产品的详细信息和营销内容。`,
influencer_id: influencerId,
project_id: projectId,
platform,
post_url: postUrl,
status: 'published',
published_at: publishDate.toISOString()
};
const response = await apiClient.post('/posts', postData);
if (response.status === 201 || response.status === 200) {
// 打印完整响应以便调试
console.log(`帖子 #${i + 1} 响应:`, JSON.stringify(response.data, null, 2));
// 尝试从不同位置提取ID
let postId = null;
if (response.data && response.data.post && response.data.post.post_id) {
postId = response.data.post.post_id;
} else if (response.data && response.data.id) {
postId = response.data.id;
} else if (response.data && response.data.post && response.data.post.id) {
postId = response.data.post.id;
} else if (response.data && response.data._id) {
postId = response.data._id;
} else if (response.data && response.data.postId) {
postId = response.data.postId;
}
if (postId) {
postIds.push(postId);
console.log(`✅ 帖子 #${i + 1} 创建成功 (ID: ${postId})`);
} else {
console.warn(`⚠️ 帖子 #${i + 1} 创建成功但无法提取ID将无法为此帖子创建评论`);
}
} else {
console.error(`❌ 帖子 #${i + 1} 创建失败:`, response.data);
}
} catch (error) {
console.error(`❌ 创建帖子 #${i + 1} 时出错:`, error);
}
// 添加随机延迟以避免请求过于频繁
await new Promise(resolve => setTimeout(resolve, getRandomInt(100, 300)));
}
console.log(`📊 总计创建了 ${postIds.length}/${count} 个帖子`);
return postIds;
}
// 生成随机字符串
function generateRandomString(length: number): string {
const characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789';
let result = '';
for (let i = 0; i < length; i++) {
result += characters.charAt(Math.floor(Math.random() * characters.length));
}
return result;
}
// 为帖子创建假评论
async function createFakeComments(postIds: string[], commentsPerPost: number): Promise<void> {
console.log(`\n💬 开始为 ${postIds.length} 个帖子创建评论 (每个帖子 ${commentsPerPost} 条)...`);
let totalCreated = 0;
for (const postId of postIds) {
for (let i = 0; i < commentsPerPost; i++) {
try {
const commentTemplate = COMMENT_TEMPLATES[Math.floor(Math.random() * COMMENT_TEMPLATES.length)];
const commentDate = getRandomDate(START_DATE, END_DATE);
const commentData = {
content: `${commentTemplate} #${i + 1}`,
user_id: uuidv4(), // 使用随机用户ID
created_at: commentDate.toISOString()
};
// 打印请求数据用于调试
console.log(`尝试为帖子 ${postId} 创建评论:`, commentData);
const response = await apiClient.post(`/posts/${postId}/comments`, commentData);
if (response.status === 201 || response.status === 200) {
totalCreated++;
console.log(`✅ 为帖子 ${postId} 创建评论 #${i + 1} 成功`);
if (totalCreated % 10 === 0) {
console.log(`📊 已创建 ${totalCreated} 条评论`);
}
} else {
console.error(`❌ 为帖子 ${postId} 创建评论 #${i + 1} 失败:`, response.data);
}
} catch (error) {
console.error(`❌ 创建评论时出错:`, error);
}
// 添加随机延迟
await new Promise(resolve => setTimeout(resolve, getRandomInt(50, 200)));
}
}
console.log(`📊 总计创建了 ${totalCreated} 条评论`);
}
// 直接向 ClickHouse 插入分析数据
async function generateAnalyticsData(postIds: string[]): Promise<void> {
console.log('\n📈 开始生成分析数据...');
try {
// 生成视图事件数据
console.log('生成视图事件数据...');
interface ViewEvent {
user_id: string;
content_id: string;
timestamp: string;
ip: string;
user_agent: string;
}
const viewEvents: ViewEvent[] = [];
// 为每个帖子生成多条视图记录,跨越一年时间
for (const postId of postIds) {
// 每个帖子生成50-200条视图事件
const viewCount = getRandomInt(50, 200);
for (let i = 0; i < viewCount; i++) {
const viewDate = getRandomDate(START_DATE, END_DATE);
const userId = uuidv4(); // 模拟不同用户
const ip = `192.168.${getRandomInt(1, 255)}.${getRandomInt(1, 255)}`;
const userAgents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15',
'Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15',
'Mozilla/5.0 (Linux; Android 12) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0'
];
const userAgent = userAgents[Math.floor(Math.random() * userAgents.length)];
viewEvents.push({
user_id: userId,
content_id: postId,
timestamp: viewDate.toISOString(),
ip: ip,
user_agent: userAgent
});
}
}
// 批量插入视图事件
if (viewEvents.length > 0) {
try {
for (let i = 0; i < viewEvents.length; i += 1000) { // 每批最多1000条
const batch = viewEvents.slice(i, i + 1000);
await clickhouse.insert({
table: 'view_events',
values: batch,
format: 'JSONEachRow'
});
console.log(`✅ 已插入 ${Math.min(i + 1000, viewEvents.length)}/${viewEvents.length} 条视图事件`);
}
console.log('✅ 视图事件数据生成完成');
} catch (error) {
console.error('插入视图事件失败:', error);
}
}
// 生成点赞事件数据
console.log('生成点赞事件数据...');
interface LikeEvent {
user_id: string;
content_id: string;
timestamp: string;
action: string;
}
const likeEvents: LikeEvent[] = [];
for (const postId of postIds) {
// 每个帖子生成10-50条点赞事件
const likeCount = getRandomInt(10, 50);
for (let i = 0; i < likeCount; i++) {
const likeDate = getRandomDate(START_DATE, END_DATE);
const userId = uuidv4();
const action = Math.random() > 0.1 ? 'like' : 'unlike'; // 90% 是点赞10% 是取消点赞
likeEvents.push({
user_id: userId,
content_id: postId,
timestamp: likeDate.toISOString(),
action: action
});
}
}
// 批量插入点赞事件
if (likeEvents.length > 0) {
try {
for (let i = 0; i < likeEvents.length; i += 1000) { // 每批最多1000条
const batch = likeEvents.slice(i, i + 1000);
await clickhouse.insert({
table: 'like_events',
values: batch,
format: 'JSONEachRow'
});
console.log(`✅ 已插入 ${Math.min(i + 1000, likeEvents.length)}/${likeEvents.length} 条点赞事件`);
}
console.log('✅ 点赞事件数据生成完成');
} catch (error) {
console.error('插入点赞事件失败:', error);
}
}
// 生成关注事件数据
console.log('生成关注事件数据...');
interface FollowerEvent {
follower_id: string;
followed_id: string;
timestamp: string;
action: string;
}
const followerEvents: FollowerEvent[] = [];
// 为每个网红生成50-200个关注者
for (const influencerId of TEST_DATA.influencerIds) {
const followerCount = getRandomInt(50, 200);
for (let i = 0; i < followerCount; i++) {
const followDate = getRandomDate(START_DATE, END_DATE);
const followerId = uuidv4();
const action = Math.random() > 0.2 ? 'follow' : 'unfollow'; // 80% 是关注20% 是取消关注
followerEvents.push({
follower_id: followerId,
followed_id: influencerId,
timestamp: followDate.toISOString(),
action: action
});
}
}
// 批量插入关注事件
if (followerEvents.length > 0) {
try {
for (let i = 0; i < followerEvents.length; i += 1000) { // 每批最多1000条
const batch = followerEvents.slice(i, i + 1000);
await clickhouse.insert({
table: 'follower_events',
values: batch,
format: 'JSONEachRow'
});
console.log(`✅ 已插入 ${Math.min(i + 1000, followerEvents.length)}/${followerEvents.length} 条关注事件`);
}
console.log('✅ 关注事件数据生成完成');
} catch (error) {
console.error('插入关注事件失败:', error);
}
}
console.log('📊 所有分析数据生成完成!');
} catch (error) {
console.error('❌ 生成分析数据时出错:', error);
}
}
// 主函数
async function main() {
console.log('🚀 开始生成假数据');
// 1. 登录获取认证令牌
const token = await login();
if (!token) {
console.error('无法获取认证令牌,终止操作');
process.exit(1);
}
// 更新全局认证令牌
AUTH_TOKEN = token;
apiClient.interceptors.request.use(config => {
config.headers.Authorization = `Bearer ${AUTH_TOKEN}`;
return config;
});
// 2. 创建假帖子
const postCount = 20; // 要创建的帖子数量
const postIds = await createFakePosts(postCount);
if (postIds.length === 0) {
console.warn('没有成功创建帖子,跳过后续步骤');
process.exit(1);
}
// 3. 为帖子创建假评论
const commentsPerPost = 5; // 每个帖子的评论数量
await createFakeComments(postIds, commentsPerPost);
// 4. 生成分析数据
await generateAnalyticsData(postIds);
console.log('\n🎉 数据生成完成!');
process.exit(0);
}
// 执行主函数
main().catch(error => {
console.error('程序执行出错:', error);
process.exit(1);
});