hotkeyword
This commit is contained in:
@@ -754,6 +754,77 @@ export class AnalyticsController {
|
||||
}, 500);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取热门关键字数据
|
||||
* 返回按出现频率排序的热门关键字列表
|
||||
*
|
||||
* @param c Hono Context
|
||||
* @returns Response with hot keywords data
|
||||
*/
|
||||
async getHotKeywords(c: Context) {
|
||||
const requestId = `req_${Date.now()}_${Math.random().toString(36).substring(2, 10)}`;
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
// 获取查询参数
|
||||
const timeRange = c.req.query('timeRange') || '30'; // 默认30天
|
||||
const projectId = c.req.query('projectId'); // 可选项目过滤
|
||||
const platform = c.req.query('platform'); // 可选平台过滤
|
||||
const limit = parseInt(c.req.query('limit') || '20', 10); // 默认返回20个关键字
|
||||
|
||||
logger.info(`[${requestId}] Hot keywords request received`, {
|
||||
timeRange,
|
||||
projectId,
|
||||
platform,
|
||||
limit,
|
||||
userAgent: c.req.header('user-agent'),
|
||||
ip: c.req.header('x-forwarded-for') || 'unknown'
|
||||
});
|
||||
|
||||
// 验证时间范围
|
||||
if (!['7', '30', '90'].includes(timeRange)) {
|
||||
logger.warn(`[${requestId}] Invalid timeRange: ${timeRange}`);
|
||||
return c.json({
|
||||
success: false,
|
||||
error: 'Invalid timeRange. Must be 7, 30, or 90.'
|
||||
}, 400);
|
||||
}
|
||||
|
||||
// 调用服务获取热门关键词数据
|
||||
const hotKeywordsData = await analyticsService.getHotKeywords(
|
||||
parseInt(timeRange, 10),
|
||||
projectId,
|
||||
platform,
|
||||
limit
|
||||
);
|
||||
|
||||
const response = {
|
||||
success: true,
|
||||
data: hotKeywordsData.data,
|
||||
metadata: {
|
||||
total: hotKeywordsData.total,
|
||||
is_mock_data: false
|
||||
}
|
||||
};
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
logger.info(`[${requestId}] Hot keywords request completed in ${duration}ms`);
|
||||
|
||||
return c.json(response);
|
||||
} catch (error) {
|
||||
const duration = Date.now() - startTime;
|
||||
logger.error(`[${requestId}] Error getting hot keywords: ${error}`, {
|
||||
error: String(error),
|
||||
duration
|
||||
});
|
||||
|
||||
return c.json({
|
||||
success: false,
|
||||
error: 'Failed to fetch hot keywords data'
|
||||
}, 500);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Export singleton instance
|
||||
|
||||
@@ -50,4 +50,7 @@ analyticsRouter.get('/popular-posts', (c) => analyticsController.getPopularPosts
|
||||
// Add moderation status distribution route
|
||||
analyticsRouter.get('/moderation-status', (c) => analyticsController.getModerationStatus(c));
|
||||
|
||||
// Add hot keywords route
|
||||
analyticsRouter.get('/hot-keywords', (c) => analyticsController.getHotKeywords(c));
|
||||
|
||||
export default analyticsRouter;
|
||||
@@ -207,6 +207,24 @@ export interface ModerationStatusResponse {
|
||||
total: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a hot keyword item with count, percentage and sentiment score
|
||||
*/
|
||||
export interface HotKeywordItem {
|
||||
keyword: string; // 关键词
|
||||
count: number; // 出现次数
|
||||
percentage: number; // 占比(%)
|
||||
sentiment_score: number; // 情感分数
|
||||
}
|
||||
|
||||
/**
|
||||
* Response structure for hot keywords
|
||||
*/
|
||||
export interface HotKeywordsResponse {
|
||||
data: HotKeywordItem[]; // 热门关键词数据
|
||||
total: number; // 总数
|
||||
}
|
||||
|
||||
/**
|
||||
* Analytics service for KOL performance data
|
||||
*/
|
||||
@@ -1801,6 +1819,98 @@ export class AnalyticsService {
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get hot keywords from comments with their frequency and sentiment scores
|
||||
* @param timeRange Time range in days (7, 30, 90)
|
||||
* @param projectId Optional project ID to filter by
|
||||
* @param platform Optional platform to filter by
|
||||
* @param limit Maximum number of keywords to return
|
||||
* @returns Hot keywords with their counts, percentages and sentiment scores
|
||||
*/
|
||||
async getHotKeywords(
|
||||
timeRange: number,
|
||||
projectId?: string,
|
||||
platform?: string,
|
||||
limit: number = 20
|
||||
): Promise<HotKeywordsResponse> {
|
||||
try {
|
||||
const startDate = new Date();
|
||||
startDate.setDate(startDate.getDate() - timeRange);
|
||||
const formattedStartDate = this.formatDateForClickhouse(startDate);
|
||||
|
||||
// Build the WHERE clause with filters
|
||||
const filters = [`event_time >= '${formattedStartDate}'`];
|
||||
if (projectId) {
|
||||
filters.push(`project_id = '${projectId}'`);
|
||||
}
|
||||
if (platform) {
|
||||
filters.push(`platform = '${platform}'`);
|
||||
}
|
||||
|
||||
// Add filter for comment events only
|
||||
filters.push(`event_type = 'comment'`);
|
||||
|
||||
const whereClause = filters.length > 0 ? `WHERE ${filters.join(' AND ')}` : '';
|
||||
|
||||
// Query to extract keywords from comments and count their occurrences
|
||||
const keywordsQuery = `
|
||||
WITH extracted_keywords AS (
|
||||
SELECT
|
||||
arrayJoin(extractAll(comment_text, '[\\\\p{L}\\\\p{N}]{2,}')) AS keyword,
|
||||
sentiment_score
|
||||
FROM events
|
||||
${whereClause}
|
||||
WHERE comment_text != ''
|
||||
)
|
||||
SELECT
|
||||
keyword,
|
||||
count() AS count,
|
||||
avg(sentiment_score) AS avg_sentiment_score
|
||||
FROM extracted_keywords
|
||||
GROUP BY keyword
|
||||
ORDER BY count DESC
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
|
||||
// Execute the query
|
||||
const keywordsData = await this.executeClickhouseQuery(keywordsQuery);
|
||||
|
||||
// Calculate total count for percentages
|
||||
const totalCount = keywordsData.reduce((sum, item) => sum + item.count, 0);
|
||||
|
||||
// Format the response
|
||||
const hotKeywords: HotKeywordItem[] = keywordsData.map(item => ({
|
||||
keyword: item.keyword,
|
||||
count: item.count,
|
||||
percentage: parseFloat(((item.count / totalCount) * 100).toFixed(1)),
|
||||
sentiment_score: parseFloat(item.avg_sentiment_score.toFixed(1))
|
||||
}));
|
||||
|
||||
return {
|
||||
data: hotKeywords,
|
||||
total: totalCount
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(`Error fetching hot keywords: ${error}`);
|
||||
|
||||
// Return mock data in case of error
|
||||
const mockKeywords: HotKeywordItem[] = [
|
||||
{ keyword: '价格', count: 45, percentage: 22.5, sentiment_score: 0.2 },
|
||||
{ keyword: '质量', count: 38, percentage: 19.0, sentiment_score: 0.7 },
|
||||
{ keyword: '服务', count: 32, percentage: 16.0, sentiment_score: -0.3 },
|
||||
{ keyword: '快递', count: 28, percentage: 14.0, sentiment_score: 0.1 },
|
||||
{ keyword: '推荐', count: 24, percentage: 12.0, sentiment_score: 0.8 },
|
||||
{ keyword: '问题', count: 18, percentage: 9.0, sentiment_score: -0.6 },
|
||||
{ keyword: '体验', count: 15, percentage: 7.5, sentiment_score: 0.4 }
|
||||
];
|
||||
|
||||
return {
|
||||
data: mockKeywords,
|
||||
total: 100
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Export singleton instance
|
||||
|
||||
@@ -3178,6 +3178,148 @@ export const openAPISpec = {
|
||||
}
|
||||
}
|
||||
},
|
||||
'/api/analytics/hot-keywords': {
|
||||
get: {
|
||||
summary: '获取热门关键词',
|
||||
description: '返回按出现频率排序的热门关键词列表,包含关键词、出现次数、占比和情感分数',
|
||||
tags: ['Analytics'],
|
||||
parameters: [
|
||||
{
|
||||
name: 'timeRange',
|
||||
in: 'query',
|
||||
description: '时间范围(天)',
|
||||
schema: {
|
||||
type: 'integer',
|
||||
enum: [7, 30, 90],
|
||||
default: 30
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'projectId',
|
||||
in: 'query',
|
||||
description: '项目ID(可选)',
|
||||
schema: {
|
||||
type: 'string'
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'platform',
|
||||
in: 'query',
|
||||
description: '平台(可选)',
|
||||
schema: {
|
||||
type: 'string',
|
||||
enum: ['weibo', 'xiaohongshu', 'douyin', 'bilibili']
|
||||
}
|
||||
},
|
||||
{
|
||||
name: 'limit',
|
||||
in: 'query',
|
||||
description: '返回关键词数量上限',
|
||||
schema: {
|
||||
type: 'integer',
|
||||
default: 20
|
||||
}
|
||||
}
|
||||
],
|
||||
responses: {
|
||||
'200': {
|
||||
description: '成功获取热门关键词',
|
||||
content: {
|
||||
'application/json': {
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
success: {
|
||||
type: 'boolean',
|
||||
example: true
|
||||
},
|
||||
data: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
keyword: {
|
||||
type: 'string',
|
||||
example: '质量'
|
||||
},
|
||||
count: {
|
||||
type: 'integer',
|
||||
example: 38
|
||||
},
|
||||
percentage: {
|
||||
type: 'number',
|
||||
format: 'float',
|
||||
example: 19.0
|
||||
},
|
||||
sentiment_score: {
|
||||
type: 'number',
|
||||
format: 'float',
|
||||
example: 0.7
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
metadata: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
total: {
|
||||
type: 'integer',
|
||||
example: 100
|
||||
},
|
||||
is_mock_data: {
|
||||
type: 'boolean',
|
||||
example: false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
'400': {
|
||||
description: '请求参数错误',
|
||||
content: {
|
||||
'application/json': {
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
success: {
|
||||
type: 'boolean',
|
||||
example: false
|
||||
},
|
||||
error: {
|
||||
type: 'string',
|
||||
example: 'Invalid timeRange. Must be 7, 30, or 90.'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
'500': {
|
||||
description: '服务器错误',
|
||||
content: {
|
||||
'application/json': {
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
success: {
|
||||
type: 'boolean',
|
||||
example: false
|
||||
},
|
||||
error: {
|
||||
type: 'string',
|
||||
example: 'Failed to fetch hot keywords data'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
components: {
|
||||
schemas: {
|
||||
|
||||
Reference in New Issue
Block a user