hotkeyword

This commit is contained in:
2025-03-14 22:02:10 +08:00
parent 942fb592b5
commit bd1a5ce384
5 changed files with 496 additions and 0 deletions

View File

@@ -754,6 +754,77 @@ export class AnalyticsController {
}, 500);
}
}
/**
* 获取热门关键字数据
* 返回按出现频率排序的热门关键字列表
*
* @param c Hono Context
* @returns Response with hot keywords data
*/
async getHotKeywords(c: Context) {
const requestId = `req_${Date.now()}_${Math.random().toString(36).substring(2, 10)}`;
const startTime = Date.now();
try {
// 获取查询参数
const timeRange = c.req.query('timeRange') || '30'; // 默认30天
const projectId = c.req.query('projectId'); // 可选项目过滤
const platform = c.req.query('platform'); // 可选平台过滤
const limit = parseInt(c.req.query('limit') || '20', 10); // 默认返回20个关键字
logger.info(`[${requestId}] Hot keywords request received`, {
timeRange,
projectId,
platform,
limit,
userAgent: c.req.header('user-agent'),
ip: c.req.header('x-forwarded-for') || 'unknown'
});
// 验证时间范围
if (!['7', '30', '90'].includes(timeRange)) {
logger.warn(`[${requestId}] Invalid timeRange: ${timeRange}`);
return c.json({
success: false,
error: 'Invalid timeRange. Must be 7, 30, or 90.'
}, 400);
}
// 调用服务获取热门关键词数据
const hotKeywordsData = await analyticsService.getHotKeywords(
parseInt(timeRange, 10),
projectId,
platform,
limit
);
const response = {
success: true,
data: hotKeywordsData.data,
metadata: {
total: hotKeywordsData.total,
is_mock_data: false
}
};
const duration = Date.now() - startTime;
logger.info(`[${requestId}] Hot keywords request completed in ${duration}ms`);
return c.json(response);
} catch (error) {
const duration = Date.now() - startTime;
logger.error(`[${requestId}] Error getting hot keywords: ${error}`, {
error: String(error),
duration
});
return c.json({
success: false,
error: 'Failed to fetch hot keywords data'
}, 500);
}
}
}
// Export singleton instance

View File

@@ -50,4 +50,7 @@ analyticsRouter.get('/popular-posts', (c) => analyticsController.getPopularPosts
// Add moderation status distribution route
analyticsRouter.get('/moderation-status', (c) => analyticsController.getModerationStatus(c));
// Add hot keywords route
analyticsRouter.get('/hot-keywords', (c) => analyticsController.getHotKeywords(c));
export default analyticsRouter;

View File

@@ -207,6 +207,24 @@ export interface ModerationStatusResponse {
total: number;
}
/**
* Represents a hot keyword item with count, percentage and sentiment score
*/
export interface HotKeywordItem {
keyword: string; // 关键词
count: number; // 出现次数
percentage: number; // 占比(%)
sentiment_score: number; // 情感分数
}
/**
* Response structure for hot keywords
*/
export interface HotKeywordsResponse {
data: HotKeywordItem[]; // 热门关键词数据
total: number; // 总数
}
/**
* Analytics service for KOL performance data
*/
@@ -1801,6 +1819,98 @@ export class AnalyticsService {
};
}
}
/**
* Get hot keywords from comments with their frequency and sentiment scores
* @param timeRange Time range in days (7, 30, 90)
* @param projectId Optional project ID to filter by
* @param platform Optional platform to filter by
* @param limit Maximum number of keywords to return
* @returns Hot keywords with their counts, percentages and sentiment scores
*/
async getHotKeywords(
timeRange: number,
projectId?: string,
platform?: string,
limit: number = 20
): Promise<HotKeywordsResponse> {
try {
const startDate = new Date();
startDate.setDate(startDate.getDate() - timeRange);
const formattedStartDate = this.formatDateForClickhouse(startDate);
// Build the WHERE clause with filters
const filters = [`event_time >= '${formattedStartDate}'`];
if (projectId) {
filters.push(`project_id = '${projectId}'`);
}
if (platform) {
filters.push(`platform = '${platform}'`);
}
// Add filter for comment events only
filters.push(`event_type = 'comment'`);
const whereClause = filters.length > 0 ? `WHERE ${filters.join(' AND ')}` : '';
// Query to extract keywords from comments and count their occurrences
const keywordsQuery = `
WITH extracted_keywords AS (
SELECT
arrayJoin(extractAll(comment_text, '[\\\\p{L}\\\\p{N}]{2,}')) AS keyword,
sentiment_score
FROM events
${whereClause}
WHERE comment_text != ''
)
SELECT
keyword,
count() AS count,
avg(sentiment_score) AS avg_sentiment_score
FROM extracted_keywords
GROUP BY keyword
ORDER BY count DESC
LIMIT ${limit}
`;
// Execute the query
const keywordsData = await this.executeClickhouseQuery(keywordsQuery);
// Calculate total count for percentages
const totalCount = keywordsData.reduce((sum, item) => sum + item.count, 0);
// Format the response
const hotKeywords: HotKeywordItem[] = keywordsData.map(item => ({
keyword: item.keyword,
count: item.count,
percentage: parseFloat(((item.count / totalCount) * 100).toFixed(1)),
sentiment_score: parseFloat(item.avg_sentiment_score.toFixed(1))
}));
return {
data: hotKeywords,
total: totalCount
};
} catch (error) {
logger.error(`Error fetching hot keywords: ${error}`);
// Return mock data in case of error
const mockKeywords: HotKeywordItem[] = [
{ keyword: '价格', count: 45, percentage: 22.5, sentiment_score: 0.2 },
{ keyword: '质量', count: 38, percentage: 19.0, sentiment_score: 0.7 },
{ keyword: '服务', count: 32, percentage: 16.0, sentiment_score: -0.3 },
{ keyword: '快递', count: 28, percentage: 14.0, sentiment_score: 0.1 },
{ keyword: '推荐', count: 24, percentage: 12.0, sentiment_score: 0.8 },
{ keyword: '问题', count: 18, percentage: 9.0, sentiment_score: -0.6 },
{ keyword: '体验', count: 15, percentage: 7.5, sentiment_score: 0.4 }
];
return {
data: mockKeywords,
total: 100
};
}
}
}
// Export singleton instance

View File

@@ -3178,6 +3178,148 @@ export const openAPISpec = {
}
}
},
'/api/analytics/hot-keywords': {
get: {
summary: '获取热门关键词',
description: '返回按出现频率排序的热门关键词列表,包含关键词、出现次数、占比和情感分数',
tags: ['Analytics'],
parameters: [
{
name: 'timeRange',
in: 'query',
description: '时间范围(天)',
schema: {
type: 'integer',
enum: [7, 30, 90],
default: 30
}
},
{
name: 'projectId',
in: 'query',
description: '项目ID可选',
schema: {
type: 'string'
}
},
{
name: 'platform',
in: 'query',
description: '平台(可选)',
schema: {
type: 'string',
enum: ['weibo', 'xiaohongshu', 'douyin', 'bilibili']
}
},
{
name: 'limit',
in: 'query',
description: '返回关键词数量上限',
schema: {
type: 'integer',
default: 20
}
}
],
responses: {
'200': {
description: '成功获取热门关键词',
content: {
'application/json': {
schema: {
type: 'object',
properties: {
success: {
type: 'boolean',
example: true
},
data: {
type: 'array',
items: {
type: 'object',
properties: {
keyword: {
type: 'string',
example: '质量'
},
count: {
type: 'integer',
example: 38
},
percentage: {
type: 'number',
format: 'float',
example: 19.0
},
sentiment_score: {
type: 'number',
format: 'float',
example: 0.7
}
}
}
},
metadata: {
type: 'object',
properties: {
total: {
type: 'integer',
example: 100
},
is_mock_data: {
type: 'boolean',
example: false
}
}
}
}
}
}
}
},
'400': {
description: '请求参数错误',
content: {
'application/json': {
schema: {
type: 'object',
properties: {
success: {
type: 'boolean',
example: false
},
error: {
type: 'string',
example: 'Invalid timeRange. Must be 7, 30, or 90.'
}
}
}
}
}
},
'500': {
description: '服务器错误',
content: {
'application/json': {
schema: {
type: 'object',
properties: {
success: {
type: 'boolean',
example: false
},
error: {
type: 'string',
example: 'Failed to fetch hot keywords data'
}
}
}
}
}
}
}
}
},
},
components: {
schemas: {