This commit is contained in:
2025-03-10 18:03:47 +08:00
parent 755fb6ac04
commit e49b3a2172
23 changed files with 802 additions and 5481 deletions

View File

@@ -15,6 +15,7 @@ import { initClickHouse } from './utils/clickhouse';
import { initWorkers } from './utils/queue';
import { initDatabase, createSampleData, checkDatabaseConnection } from './utils/initDatabase';
import { createSwaggerUI } from './swagger';
import { initScheduledTaskWorkers } from './utils/scheduledTasks';
// Create Hono app
const app = new Hono();
@@ -119,16 +120,12 @@ const startServer = async () => {
console.log('NOTICE: Database will NOT be automatically initialized on startup');
console.log('Use /api/admin/init-db endpoint to manually initialize the database if needed');
// Initialize BullMQ workers
let workers;
try {
workers = initWorkers();
console.log('BullMQ workers initialized');
} catch (error) {
console.error('Failed to initialize BullMQ workers:', error);
console.log('Background processing will not be available...');
workers = { analyticsWorker: null, notificationsWorker: null };
}
// Initialize workers for background processing
console.log('🏗️ Initializing workers...');
const workers = {
backgroundWorkers: initWorkers(),
scheduledTaskWorker: initScheduledTaskWorkers()
};
// Start server
const port = Number(config.port);
@@ -149,12 +146,16 @@ const startServer = async () => {
console.log('Shutting down server...');
// Close workers if they exist
if (workers.analyticsWorker) {
await workers.analyticsWorker.close();
if (workers.backgroundWorkers.analyticsWorker) {
await workers.backgroundWorkers.analyticsWorker.close();
}
if (workers.notificationsWorker) {
await workers.notificationsWorker.close();
if (workers.backgroundWorkers.notificationsWorker) {
await workers.backgroundWorkers.notificationsWorker.close();
}
if (workers.scheduledTaskWorker) {
await workers.scheduledTaskWorker.close();
}
process.exit(0);

View File

@@ -4,6 +4,12 @@ import clickhouse from '../utils/clickhouse';
import { addAnalyticsJob } from '../utils/queue';
import { getRedisClient } from '../utils/redis';
import supabase from '../utils/supabase';
import {
scheduleInfluencerCollection,
schedulePostCollection,
removeScheduledJob,
getScheduledJobs
} from '../utils/scheduledTasks';
// Define user type
interface User {
@@ -519,4 +525,357 @@ analyticsRouter.get('/project/:id/interaction-types', async (c) => {
}
});
// ===== Scheduled Collection Endpoints =====
// Schedule automated data collection for an influencer
analyticsRouter.post('/schedule/influencer', async (c) => {
try {
const { influencer_id, cron_expression } = await c.req.json();
if (!influencer_id) {
return c.json({ error: 'Influencer ID is required' }, 400);
}
// Validate that the influencer exists
const { data, error } = await supabase
.from('influencers')
.select('influencer_id')
.eq('influencer_id', influencer_id)
.single();
if (error || !data) {
return c.json({ error: 'Influencer not found' }, 404);
}
// Schedule the collection job
await scheduleInfluencerCollection(
influencer_id,
cron_expression || '0 0 * * *' // Default: Every day at midnight
);
return c.json({
message: 'Influencer metrics collection scheduled successfully',
influencer_id,
cron_expression: cron_expression || '0 0 * * *'
});
} catch (error) {
console.error('Error scheduling influencer collection:', error);
return c.json({ error: 'Internal server error' }, 500);
}
});
// Schedule automated data collection for a post
analyticsRouter.post('/schedule/post', async (c) => {
try {
const { post_id, cron_expression } = await c.req.json();
if (!post_id) {
return c.json({ error: 'Post ID is required' }, 400);
}
// Validate that the post exists
const { data, error } = await supabase
.from('posts')
.select('post_id')
.eq('post_id', post_id)
.single();
if (error || !data) {
return c.json({ error: 'Post not found' }, 404);
}
// Schedule the collection job
await schedulePostCollection(
post_id,
cron_expression || '0 0 * * *' // Default: Every day at midnight
);
return c.json({
message: 'Post metrics collection scheduled successfully',
post_id,
cron_expression: cron_expression || '0 0 * * *'
});
} catch (error) {
console.error('Error scheduling post collection:', error);
return c.json({ error: 'Internal server error' }, 500);
}
});
// Get all scheduled collection jobs
analyticsRouter.get('/schedule', async (c) => {
try {
const scheduledJobs = await getScheduledJobs();
return c.json({
scheduled_jobs: scheduledJobs
});
} catch (error) {
console.error('Error fetching scheduled jobs:', error);
return c.json({ error: 'Internal server error' }, 500);
}
});
// Delete a scheduled collection job
analyticsRouter.delete('/schedule/:job_id', async (c) => {
try {
const jobId = c.req.param('job_id');
await removeScheduledJob(jobId);
return c.json({
message: 'Scheduled job removed successfully',
job_id: jobId
});
} catch (error) {
console.error('Error removing scheduled job:', error);
return c.json({ error: 'Internal server error' }, 500);
}
});
// ===== Data Export Endpoints =====
// Export influencer growth data (CSV format)
analyticsRouter.get('/export/influencer/:id/growth', async (c) => {
try {
const influencerId = c.req.param('id');
const {
metric = 'followers_count',
timeframe = '6months',
interval = 'month'
} = c.req.query();
// The same logic as the influencer growth endpoint, but return CSV format
// Validate parameters
const validMetrics = ['followers_count', 'video_count', 'views_count', 'likes_count'];
if (!validMetrics.includes(metric)) {
return c.json({ error: 'Invalid metric specified' }, 400);
}
// Determine time range and interval function
let timeRangeSql: string;
let intervalFunction: string;
switch (timeframe) {
case '30days':
timeRangeSql = 'timestamp >= subtractDays(now(), 30)';
break;
case '90days':
timeRangeSql = 'timestamp >= subtractDays(now(), 90)';
break;
case '6months':
default:
timeRangeSql = 'timestamp >= subtractMonths(now(), 6)';
break;
case '1year':
timeRangeSql = 'timestamp >= subtractYears(now(), 1)';
break;
}
switch (interval) {
case 'day':
intervalFunction = 'toDate(timestamp)';
break;
case 'week':
intervalFunction = 'toStartOfWeek(timestamp)';
break;
case 'month':
default:
intervalFunction = 'toStartOfMonth(timestamp)';
break;
}
// Query ClickHouse for data
const result = await clickhouse.query({
query: `
SELECT
${intervalFunction} AS time_period,
sumIf(metric_value, metric_name = ?) AS change,
maxIf(metric_total, metric_name = ?) AS total_value
FROM promote.events
WHERE
influencer_id = ? AND
event_type = ? AND
${timeRangeSql}
GROUP BY time_period
ORDER BY time_period ASC
`,
values: [
metric,
metric,
influencerId,
`${metric}_change`
]
});
// Get influencer info
const { data: influencer } = await supabase
.from('influencers')
.select('name, platform')
.eq('influencer_id', influencerId)
.single();
// Extract trend data
const trendData = 'rows' in result ? result.rows : [];
// Format as CSV
const csvHeader = `Time Period,Change,Total Value\n`;
const csvRows = trendData.map((row: any) =>
`${row.time_period},${row.change},${row.total_value}`
).join('\n');
const influencerInfo = influencer
? `Influencer: ${influencer.name} (${influencer.platform})\nMetric: ${metric}\nTimeframe: ${timeframe}\nInterval: ${interval}\n\n`
: '';
const csvContent = influencerInfo + csvHeader + csvRows;
return c.body(csvContent, {
headers: {
'Content-Type': 'text/csv',
'Content-Disposition': `attachment; filename="influencer_growth_${influencerId}.csv"`
}
});
} catch (error) {
console.error('Error exporting influencer growth data:', error);
return c.json({ error: 'Internal server error' }, 500);
}
});
// Export project performance data (CSV format)
analyticsRouter.get('/export/project/:id/performance', async (c) => {
try {
const projectId = c.req.param('id');
const { timeframe = '30days' } = c.req.query();
// Get project information
const { data: project, error: projectError } = await supabase
.from('projects')
.select('id, name, description')
.eq('id', projectId)
.single();
if (projectError) {
return c.json({ error: 'Project not found' }, 404);
}
// Get project influencers
const { data: projectInfluencers, error: influencersError } = await supabase
.from('project_influencers')
.select('influencer_id')
.eq('project_id', projectId);
if (influencersError) {
console.error('Error fetching project influencers:', influencersError);
return c.json({ error: 'Failed to fetch project data' }, 500);
}
const influencerIds = projectInfluencers.map(pi => pi.influencer_id);
if (influencerIds.length === 0) {
const emptyCSV = `Project: ${project.name}\nNo influencers found in this project.`;
return c.body(emptyCSV, {
headers: {
'Content-Type': 'text/csv',
'Content-Disposition': `attachment; filename="project_performance_${projectId}.csv"`
}
});
}
// Determine time range
let startDate: Date;
const endDate = new Date();
switch (timeframe) {
case '7days':
startDate = new Date(endDate);
startDate.setDate(endDate.getDate() - 7);
break;
case '30days':
default:
startDate = new Date(endDate);
startDate.setDate(endDate.getDate() - 30);
break;
case '90days':
startDate = new Date(endDate);
startDate.setDate(endDate.getDate() - 90);
break;
case '6months':
startDate = new Date(endDate);
startDate.setMonth(endDate.getMonth() - 6);
break;
}
// Get influencer details
const { data: influencersData } = await supabase
.from('influencers')
.select('influencer_id, name, platform, followers_count')
.in('influencer_id', influencerIds);
// Get metrics from ClickHouse
const metricsResult = await clickhouse.query({
query: `
SELECT
influencer_id,
sumIf(metric_value, event_type = 'followers_count_change') AS followers_change,
sumIf(metric_value, event_type = 'post_views_count_change') AS views_change,
sumIf(metric_value, event_type = 'post_likes_count_change') AS likes_change
FROM promote.events
WHERE
influencer_id IN (?) AND
timestamp >= ? AND
timestamp <= ?
GROUP BY influencer_id
`,
values: [
influencerIds,
startDate.toISOString(),
endDate.toISOString()
]
});
// Extract metrics data
const metricsData = 'rows' in metricsResult ? metricsResult.rows : [];
// Combine data
const reportData = (influencersData || []).map(influencer => {
const metrics = metricsData.find((m: any) => m.influencer_id === influencer.influencer_id) || {
followers_change: 0,
views_change: 0,
likes_change: 0
};
return {
influencer_id: influencer.influencer_id,
name: influencer.name,
platform: influencer.platform,
followers_count: influencer.followers_count,
followers_change: metrics.followers_change || 0,
views_change: metrics.views_change || 0,
likes_change: metrics.likes_change || 0
};
});
// Format as CSV
const csvHeader = `Influencer Name,Platform,Followers Count,Followers Change,Views Change,Likes Change\n`;
const csvRows = reportData.map(row =>
`${row.name},${row.platform},${row.followers_count},${row.followers_change},${row.views_change},${row.likes_change}`
).join('\n');
const projectInfo = `Project: ${project.name}\nDescription: ${project.description || 'N/A'}\nTimeframe: ${timeframe}\nExport Date: ${new Date().toISOString()}\n\n`;
const csvContent = projectInfo + csvHeader + csvRows;
return c.body(csvContent, {
headers: {
'Content-Type': 'text/csv',
'Content-Disposition': `attachment; filename="project_performance_${projectId}.csv"`
}
});
} catch (error) {
console.error('Error exporting project performance data:', error);
return c.json({ error: 'Internal server error' }, 500);
}
});
export default analyticsRouter;

View File

View File

@@ -46,6 +46,28 @@ export const initClickHouseTables = async () => {
try {
console.log('开始初始化 ClickHouse 数据表...');
// Create events table for general analytics tracking
await clickhouse.query({
query: `
CREATE TABLE IF NOT EXISTS promote.events (
event_id UUID DEFAULT generateUUIDv4(),
event_type String,
influencer_id String DEFAULT '',
post_id String DEFAULT '',
project_id String DEFAULT '',
timestamp DateTime64(3) DEFAULT now64(3),
metric_name String DEFAULT '',
metric_value Int64 DEFAULT 0,
metric_total Int64 DEFAULT 0,
recorded_by String DEFAULT '',
extra_data String DEFAULT ''
)
ENGINE = MergeTree()
ORDER BY (event_type, influencer_id, post_id, timestamp)
`
});
console.log(' - Created events table');
// 创建事件表
await clickhouse.query({
query: `

View File

@@ -0,0 +1,406 @@
import { Queue, Worker } from 'bullmq';
import supabase from './supabase';
import clickhouse from './clickhouse';
import { getRedisClient } from './redis';
interface ScheduledCollectionData {
type: 'influencer_metrics' | 'post_metrics';
influencer_id?: string;
post_id?: string;
project_id?: string;
}
// Create a mock scheduler if BullMQ doesn't export QueueScheduler
class MockQueueScheduler {
constructor(queueName: string, options: any) {
console.log(`Creating mock scheduler for queue: ${queueName}`);
}
}
// Create scheduled collection queue
const createScheduledTaskQueue = async () => {
const connection = {
host: process.env.BULL_REDIS_HOST || 'localhost',
port: parseInt(process.env.BULL_REDIS_PORT || '6379'),
password: process.env.BULL_REDIS_PASSWORD || '',
};
const queueOptions = {
connection,
defaultJobOptions: {
attempts: 3,
backoff: {
type: 'exponential',
delay: 5000,
},
},
};
// Create queue
const scheduledCollectionQueue = new Queue('scheduled-data-collection', queueOptions);
// Note about scheduler:
// In a production environment, a QueueScheduler should be initialized
// for handling delayed/repeatable jobs properly.
// The QueueScheduler can be initialized separately if needed.
return scheduledCollectionQueue;
};
// Initialize scheduled collection workers
export const initScheduledTaskWorkers = () => {
const worker = new Worker(
'scheduled-data-collection',
async (job) => {
console.log(`Processing scheduled task: ${job.id}`, job.data);
const { type, influencer_id, post_id, project_id } = job.data as ScheduledCollectionData;
try {
if (type === 'influencer_metrics') {
await collectInfluencerMetrics(influencer_id);
} else if (type === 'post_metrics') {
await collectPostMetrics(post_id);
}
console.log(`Successfully completed scheduled task: ${job.id}`);
return { success: true, timestamp: new Date().toISOString() };
} catch (error) {
console.error(`Error processing scheduled task ${job.id}:`, error);
throw error;
}
},
{
connection: {
host: process.env.BULL_REDIS_HOST || 'localhost',
port: parseInt(process.env.BULL_REDIS_PORT || '6379'),
password: process.env.BULL_REDIS_PASSWORD || '',
},
concurrency: 5,
}
);
worker.on('completed', job => {
console.log(`Scheduled task completed: ${job.id}`);
});
worker.on('failed', (job, err) => {
console.error(`Scheduled task failed: ${job?.id}`, err);
});
return worker;
};
// Schedule data collection jobs
export const scheduleInfluencerCollection = async (
influencerId: string,
cronExpression: string = '0 0 * * *' // Default: Every day at midnight
) => {
const queue = await createScheduledTaskQueue();
await queue.add(
`influencer-collection-${influencerId}`,
{
type: 'influencer_metrics',
influencer_id: influencerId,
scheduled_at: new Date().toISOString()
},
{
jobId: `influencer-${influencerId}-${Date.now()}`,
repeat: {
pattern: cronExpression
}
}
);
return true;
};
export const schedulePostCollection = async (
postId: string,
cronExpression: string = '0 0 * * *' // Default: Every day at midnight
) => {
const queue = await createScheduledTaskQueue();
await queue.add(
`post-collection-${postId}`,
{
type: 'post_metrics',
post_id: postId,
scheduled_at: new Date().toISOString()
},
{
jobId: `post-${postId}-${Date.now()}`,
repeat: {
pattern: cronExpression
}
}
);
return true;
};
// Remove scheduled jobs
export const removeScheduledJob = async (jobId: string) => {
const queue = await createScheduledTaskQueue();
await queue.removeRepeatableByKey(jobId);
return true;
};
// Get all scheduled jobs
export const getScheduledJobs = async () => {
const queue = await createScheduledTaskQueue();
const repeatableJobs = await queue.getRepeatableJobs();
return repeatableJobs;
};
// Implementation of collection functions
// These functions would typically call APIs or scrape data from platforms
async function collectInfluencerMetrics(influencerId?: string) {
if (!influencerId) {
throw new Error('Influencer ID is required');
}
// Get influencer data from Supabase
const { data: influencer, error } = await supabase
.from('influencers')
.select('influencer_id, name, platform, external_id')
.eq('influencer_id', influencerId)
.single();
if (error || !influencer) {
throw new Error(`Failed to find influencer: ${error?.message}`);
}
// Here you would integrate with platform APIs to get updated metrics
// This is a placeholder that would be replaced with actual API calls
// Simulate collecting metrics (in a real scenario, this would come from APIs)
const simulatedMetrics = {
followers_count: Math.floor(50000 + Math.random() * 1000),
video_count: Math.floor(100 + Math.random() * 5),
views_count: Math.floor(1000000 + Math.random() * 50000),
likes_count: Math.floor(500000 + Math.random() * 20000)
};
// Record the metrics in both Supabase and ClickHouse
// Get the current metrics to calculate changes
const { data: currentMetrics, error: metricsError } = await supabase
.from('influencers')
.select('followers_count, video_count')
.eq('influencer_id', influencerId)
.single();
if (metricsError) {
throw new Error(`Failed to get current metrics: ${metricsError.message}`);
}
// Calculate changes
const followerChange = (simulatedMetrics.followers_count || 0) - (currentMetrics?.followers_count || 0);
const videoChange = (simulatedMetrics.video_count || 0) - (currentMetrics?.video_count || 0);
// Update Supabase
const { error: updateError } = await supabase
.from('influencers')
.update(simulatedMetrics)
.eq('influencer_id', influencerId);
if (updateError) {
throw new Error(`Failed to update influencer metrics: ${updateError.message}`);
}
// Record events in ClickHouse
const timestamp = new Date().toISOString();
const eventPromises = [];
if (followerChange !== 0) {
eventPromises.push(
clickhouse.query({
query: `
INSERT INTO promote.events (
event_type,
influencer_id,
timestamp,
metric_name,
metric_value,
metric_total,
recorded_by
) VALUES (?, ?, ?, ?, ?, ?, ?)
`,
values: [
'followers_count_change',
influencerId,
timestamp,
'followers_count',
followerChange,
simulatedMetrics.followers_count,
'system' // Recorded by the system scheduler
]
})
);
}
if (videoChange !== 0) {
eventPromises.push(
clickhouse.query({
query: `
INSERT INTO promote.events (
event_type,
influencer_id,
timestamp,
metric_name,
metric_value,
metric_total,
recorded_by
) VALUES (?, ?, ?, ?, ?, ?, ?)
`,
values: [
'video_count_change',
influencerId,
timestamp,
'video_count',
videoChange,
simulatedMetrics.video_count,
'system' // Recorded by the system scheduler
]
})
);
}
await Promise.all(eventPromises);
return {
influencer_id: influencerId,
timestamp,
metrics: simulatedMetrics,
changes: {
followers: followerChange,
videos: videoChange
}
};
}
async function collectPostMetrics(postId?: string) {
if (!postId) {
throw new Error('Post ID is required');
}
// Get post data from Supabase
const { data: post, error } = await supabase
.from('posts')
.select('post_id, influencer_id, platform, post_url, title')
.eq('post_id', postId)
.single();
if (error || !post) {
throw new Error(`Failed to find post: ${error?.message}`);
}
// Here you would integrate with platform APIs to get updated metrics
// This is a placeholder that would be replaced with actual API calls
// Simulate collecting metrics (in a real scenario, this would come from APIs)
const simulatedMetrics = {
views_count: Math.floor(10000 + Math.random() * 5000),
likes_count: Math.floor(5000 + Math.random() * 1000),
comments_count: Math.floor(200 + Math.random() * 50),
shares_count: Math.floor(100 + Math.random() * 20)
};
// Get the current metrics to calculate changes
const { data: currentMetrics, error: metricsError } = await supabase
.from('posts')
.select('views_count, likes_count, comments_count, shares_count')
.eq('post_id', postId)
.single();
if (metricsError) {
throw new Error(`Failed to get current metrics: ${metricsError.message}`);
}
// Calculate changes
const viewsChange = (simulatedMetrics.views_count || 0) - (currentMetrics?.views_count || 0);
const likesChange = (simulatedMetrics.likes_count || 0) - (currentMetrics?.likes_count || 0);
const commentsChange = (simulatedMetrics.comments_count || 0) - (currentMetrics?.comments_count || 0);
const sharesChange = (simulatedMetrics.shares_count || 0) - (currentMetrics?.shares_count || 0);
// Update Supabase
const { error: updateError } = await supabase
.from('posts')
.update(simulatedMetrics)
.eq('post_id', postId);
if (updateError) {
throw new Error(`Failed to update post metrics: ${updateError.message}`);
}
// Record events in ClickHouse
const timestamp = new Date().toISOString();
const eventPromises = [];
// Only record changes if they are non-zero
interface MetricChanges {
views: number;
likes: number;
comments: number;
shares: number;
}
const changes: MetricChanges = {
views: viewsChange,
likes: likesChange,
comments: commentsChange,
shares: sharesChange
};
const metricsMap = {
views: simulatedMetrics.views_count,
likes: simulatedMetrics.likes_count,
comments: simulatedMetrics.comments_count,
shares: simulatedMetrics.shares_count
};
for (const [key, value] of Object.entries(changes)) {
if (value !== 0) {
const metricName = `${key}_count`;
const metricTotal = metricsMap[key as keyof typeof metricsMap];
eventPromises.push(
clickhouse.query({
query: `
INSERT INTO promote.events (
event_type,
post_id,
influencer_id,
timestamp,
metric_name,
metric_value,
metric_total,
recorded_by
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
`,
values: [
`post_${metricName}_change`,
postId,
post.influencer_id,
timestamp,
metricName,
value,
metricTotal,
'system' // Recorded by the system scheduler
]
})
);
}
}
await Promise.all(eventPromises);
return {
post_id: postId,
timestamp,
metrics: simulatedMetrics,
changes
};
}