From bd1a5ce384c186019023537bf8082242a7c15564 Mon Sep 17 00:00:00 2001 From: William Tso Date: Fri, 14 Mar 2025 22:02:10 +0800 Subject: [PATCH] hotkeyword --- .../src/controllers/analyticsController.ts | 71 ++++++++ backend/src/routes/analytics.ts | 3 + backend/src/services/analyticsService.ts | 110 ++++++++++++ backend/src/swagger/index.ts | 142 +++++++++++++++ web/src/components/Analytics.tsx | 170 ++++++++++++++++++ 5 files changed, 496 insertions(+) diff --git a/backend/src/controllers/analyticsController.ts b/backend/src/controllers/analyticsController.ts index 5ea2993..2d60702 100644 --- a/backend/src/controllers/analyticsController.ts +++ b/backend/src/controllers/analyticsController.ts @@ -754,6 +754,77 @@ export class AnalyticsController { }, 500); } } + + /** + * 获取热门关键字数据 + * 返回按出现频率排序的热门关键字列表 + * + * @param c Hono Context + * @returns Response with hot keywords data + */ + async getHotKeywords(c: Context) { + const requestId = `req_${Date.now()}_${Math.random().toString(36).substring(2, 10)}`; + const startTime = Date.now(); + + try { + // 获取查询参数 + const timeRange = c.req.query('timeRange') || '30'; // 默认30天 + const projectId = c.req.query('projectId'); // 可选项目过滤 + const platform = c.req.query('platform'); // 可选平台过滤 + const limit = parseInt(c.req.query('limit') || '20', 10); // 默认返回20个关键字 + + logger.info(`[${requestId}] Hot keywords request received`, { + timeRange, + projectId, + platform, + limit, + userAgent: c.req.header('user-agent'), + ip: c.req.header('x-forwarded-for') || 'unknown' + }); + + // 验证时间范围 + if (!['7', '30', '90'].includes(timeRange)) { + logger.warn(`[${requestId}] Invalid timeRange: ${timeRange}`); + return c.json({ + success: false, + error: 'Invalid timeRange. Must be 7, 30, or 90.' + }, 400); + } + + // 调用服务获取热门关键词数据 + const hotKeywordsData = await analyticsService.getHotKeywords( + parseInt(timeRange, 10), + projectId, + platform, + limit + ); + + const response = { + success: true, + data: hotKeywordsData.data, + metadata: { + total: hotKeywordsData.total, + is_mock_data: false + } + }; + + const duration = Date.now() - startTime; + logger.info(`[${requestId}] Hot keywords request completed in ${duration}ms`); + + return c.json(response); + } catch (error) { + const duration = Date.now() - startTime; + logger.error(`[${requestId}] Error getting hot keywords: ${error}`, { + error: String(error), + duration + }); + + return c.json({ + success: false, + error: 'Failed to fetch hot keywords data' + }, 500); + } + } } // Export singleton instance diff --git a/backend/src/routes/analytics.ts b/backend/src/routes/analytics.ts index 6f11402..40a54fd 100644 --- a/backend/src/routes/analytics.ts +++ b/backend/src/routes/analytics.ts @@ -50,4 +50,7 @@ analyticsRouter.get('/popular-posts', (c) => analyticsController.getPopularPosts // Add moderation status distribution route analyticsRouter.get('/moderation-status', (c) => analyticsController.getModerationStatus(c)); +// Add hot keywords route +analyticsRouter.get('/hot-keywords', (c) => analyticsController.getHotKeywords(c)); + export default analyticsRouter; \ No newline at end of file diff --git a/backend/src/services/analyticsService.ts b/backend/src/services/analyticsService.ts index f2797c6..bc74653 100644 --- a/backend/src/services/analyticsService.ts +++ b/backend/src/services/analyticsService.ts @@ -207,6 +207,24 @@ export interface ModerationStatusResponse { total: number; } +/** + * Represents a hot keyword item with count, percentage and sentiment score + */ +export interface HotKeywordItem { + keyword: string; // 关键词 + count: number; // 出现次数 + percentage: number; // 占比(%) + sentiment_score: number; // 情感分数 +} + +/** + * Response structure for hot keywords + */ +export interface HotKeywordsResponse { + data: HotKeywordItem[]; // 热门关键词数据 + total: number; // 总数 +} + /** * Analytics service for KOL performance data */ @@ -1801,6 +1819,98 @@ export class AnalyticsService { }; } } + + /** + * Get hot keywords from comments with their frequency and sentiment scores + * @param timeRange Time range in days (7, 30, 90) + * @param projectId Optional project ID to filter by + * @param platform Optional platform to filter by + * @param limit Maximum number of keywords to return + * @returns Hot keywords with their counts, percentages and sentiment scores + */ + async getHotKeywords( + timeRange: number, + projectId?: string, + platform?: string, + limit: number = 20 + ): Promise { + try { + const startDate = new Date(); + startDate.setDate(startDate.getDate() - timeRange); + const formattedStartDate = this.formatDateForClickhouse(startDate); + + // Build the WHERE clause with filters + const filters = [`event_time >= '${formattedStartDate}'`]; + if (projectId) { + filters.push(`project_id = '${projectId}'`); + } + if (platform) { + filters.push(`platform = '${platform}'`); + } + + // Add filter for comment events only + filters.push(`event_type = 'comment'`); + + const whereClause = filters.length > 0 ? `WHERE ${filters.join(' AND ')}` : ''; + + // Query to extract keywords from comments and count their occurrences + const keywordsQuery = ` + WITH extracted_keywords AS ( + SELECT + arrayJoin(extractAll(comment_text, '[\\\\p{L}\\\\p{N}]{2,}')) AS keyword, + sentiment_score + FROM events + ${whereClause} + WHERE comment_text != '' + ) + SELECT + keyword, + count() AS count, + avg(sentiment_score) AS avg_sentiment_score + FROM extracted_keywords + GROUP BY keyword + ORDER BY count DESC + LIMIT ${limit} + `; + + // Execute the query + const keywordsData = await this.executeClickhouseQuery(keywordsQuery); + + // Calculate total count for percentages + const totalCount = keywordsData.reduce((sum, item) => sum + item.count, 0); + + // Format the response + const hotKeywords: HotKeywordItem[] = keywordsData.map(item => ({ + keyword: item.keyword, + count: item.count, + percentage: parseFloat(((item.count / totalCount) * 100).toFixed(1)), + sentiment_score: parseFloat(item.avg_sentiment_score.toFixed(1)) + })); + + return { + data: hotKeywords, + total: totalCount + }; + } catch (error) { + logger.error(`Error fetching hot keywords: ${error}`); + + // Return mock data in case of error + const mockKeywords: HotKeywordItem[] = [ + { keyword: '价格', count: 45, percentage: 22.5, sentiment_score: 0.2 }, + { keyword: '质量', count: 38, percentage: 19.0, sentiment_score: 0.7 }, + { keyword: '服务', count: 32, percentage: 16.0, sentiment_score: -0.3 }, + { keyword: '快递', count: 28, percentage: 14.0, sentiment_score: 0.1 }, + { keyword: '推荐', count: 24, percentage: 12.0, sentiment_score: 0.8 }, + { keyword: '问题', count: 18, percentage: 9.0, sentiment_score: -0.6 }, + { keyword: '体验', count: 15, percentage: 7.5, sentiment_score: 0.4 } + ]; + + return { + data: mockKeywords, + total: 100 + }; + } + } } // Export singleton instance diff --git a/backend/src/swagger/index.ts b/backend/src/swagger/index.ts index d3a5337..5f1a962 100644 --- a/backend/src/swagger/index.ts +++ b/backend/src/swagger/index.ts @@ -3178,6 +3178,148 @@ export const openAPISpec = { } } }, + '/api/analytics/hot-keywords': { + get: { + summary: '获取热门关键词', + description: '返回按出现频率排序的热门关键词列表,包含关键词、出现次数、占比和情感分数', + tags: ['Analytics'], + parameters: [ + { + name: 'timeRange', + in: 'query', + description: '时间范围(天)', + schema: { + type: 'integer', + enum: [7, 30, 90], + default: 30 + } + }, + { + name: 'projectId', + in: 'query', + description: '项目ID(可选)', + schema: { + type: 'string' + } + }, + { + name: 'platform', + in: 'query', + description: '平台(可选)', + schema: { + type: 'string', + enum: ['weibo', 'xiaohongshu', 'douyin', 'bilibili'] + } + }, + { + name: 'limit', + in: 'query', + description: '返回关键词数量上限', + schema: { + type: 'integer', + default: 20 + } + } + ], + responses: { + '200': { + description: '成功获取热门关键词', + content: { + 'application/json': { + schema: { + type: 'object', + properties: { + success: { + type: 'boolean', + example: true + }, + data: { + type: 'array', + items: { + type: 'object', + properties: { + keyword: { + type: 'string', + example: '质量' + }, + count: { + type: 'integer', + example: 38 + }, + percentage: { + type: 'number', + format: 'float', + example: 19.0 + }, + sentiment_score: { + type: 'number', + format: 'float', + example: 0.7 + } + } + } + }, + metadata: { + type: 'object', + properties: { + total: { + type: 'integer', + example: 100 + }, + is_mock_data: { + type: 'boolean', + example: false + } + } + } + } + } + } + } + }, + '400': { + description: '请求参数错误', + content: { + 'application/json': { + schema: { + type: 'object', + properties: { + success: { + type: 'boolean', + example: false + }, + error: { + type: 'string', + example: 'Invalid timeRange. Must be 7, 30, or 90.' + } + } + } + } + } + }, + '500': { + description: '服务器错误', + content: { + 'application/json': { + schema: { + type: 'object', + properties: { + success: { + type: 'boolean', + example: false + }, + error: { + type: 'string', + example: 'Failed to fetch hot keywords data' + } + } + } + } + } + } + } + } + }, }, components: { schemas: { diff --git a/web/src/components/Analytics.tsx b/web/src/components/Analytics.tsx index 163225d..7964538 100644 --- a/web/src/components/Analytics.tsx +++ b/web/src/components/Analytics.tsx @@ -232,6 +232,21 @@ interface PopularPostsResponse { error?: string; } +// 添加热门关键字API响应接口 +interface HotKeywordsResponse { + success: boolean; + data: { + keyword: string; + count: number; + percentage: number; + sentiment_score: number; + }[]; + metadata: { + total: number; + }; + error?: string; +} + const Analytics: React.FC = () => { const [timeRange, setTimeRange] = useState('30'); // 修改默认值为'30'与API匹配 const [selectedKOL, setSelectedKOL] = useState('all'); @@ -261,6 +276,16 @@ const Analytics: React.FC = () => { const [moderationLoading, setModerationLoading] = useState(true); const [moderationError, setModerationError] = useState(null); + // 添加热门关键字相关状态 + const [hotKeywords, setHotKeywords] = useState<{ + keyword: string; + count: number; + percentage: number; + sentiment_score: number; + }[]>([]); + const [keywordsLoading, setKeywordsLoading] = useState(true); + const [keywordsError, setKeywordsError] = useState(null); + // 添加项目相关状态 const [projects, setProjects] = useState([ { id: '1', name: '项目 1', description: '示例项目 1' }, @@ -502,6 +527,9 @@ const Analytics: React.FC = () => { // 获取审核状态分布数据 fetchModerationStatus(); + // 获取热门关键字数据 + fetchHotKeywords(); + const fetchAnalyticsData = async () => { try { setLoading(true); @@ -1449,6 +1477,7 @@ const Analytics: React.FC = () => { fetchSentimentAnalysis(); fetchPopularPosts(); fetchModerationStatus(); // 添加刷新审核状态数据 + fetchHotKeywords(); // 添加刷新热门关键字数据 }; // 项目选择变化处理函数 @@ -1461,6 +1490,7 @@ const Analytics: React.FC = () => { fetchSentimentAnalysis(); fetchPopularPosts(); fetchModerationStatus(); // 添加刷新审核状态数据 + fetchHotKeywords(); // 添加刷新热门关键字数据 }; // 平台选择变化处理函数 @@ -1473,6 +1503,95 @@ const Analytics: React.FC = () => { fetchSentimentAnalysis(); fetchPopularPosts(); fetchModerationStatus(); // 添加刷新审核状态数据 + fetchHotKeywords(); // 添加刷新热门关键字数据 + }; + + // 获取热门关键字数据 + const fetchHotKeywords = async () => { + try { + setKeywordsLoading(true); + setKeywordsError(null); + + // 构建API URL + const url = `http://localhost:4000/api/analytics/hot-keywords?timeRange=${timeRange}`; + + // 添加项目过滤参数(如果选择了特定项目) + const urlWithFilters = selectedProject !== 'all' + ? `${url}&projectId=${selectedProject}` + : url; + + // 添加平台过滤参数(如果选择了特定平台) + const finalUrl = selectedPlatform !== 'all' + ? `${urlWithFilters}&platform=${selectedPlatform}` + : urlWithFilters; + + console.log('请求热门关键字数据URL:', finalUrl); + + // 添加认证头 + const authToken = 'eyJhbGciOiJIUzI1NiIsImtpZCI6Inl3blNGYnRBOGtBUnl4UmUiLCJ0eXAiOiJKV1QifQ.eyJpc3MiOiJodHRwczovL3h0cWhsdXpvcm5hemxta29udWNyLnN1cGFiYXNlLmNvL2F1dGgvdjEiLCJzdWIiOiI1YjQzMThiZi0yMWE4LTQ3YWMtOGJmYS0yYThmOGVmOWMwZmIiLCJhdWQiOiJhdXRoZW50aWNhdGVkIiwiZXhwIjoxNzQxNjI3ODkyLCJpYXQiOjE3NDE2MjQyOTIsImVtYWlsIjoidml0YWxpdHltYWlsZ0BnbWFpbC5jb20iLCJwaG9uZSI6IiIsImFwcF9tZXRhZGF0YSI6eyJwcm92aWRlciI6ImVtYWlsIiwicHJvdmlkZXJzIjpbImVtYWlsIl19LCJ1c2VyX21ldGFkYXRhIjp7ImVtYWlsX3ZlcmlmaWVkIjp0cnVlfSwicm9sZSI6ImF1dGhlbnRpY2F0ZWQiLCJhYWwiOiJhYWwxIiwiYW1yIjpbeyJtZXRob2QiOiJwYXNzd29yZCIsInRpbWVzdGFtcCI6MTc0MTYyNDI5Mn1dLCJzZXNzaW9uX2lkIjoiODlmYjg0YzktZmEzYy00YmVlLTk0MDQtNjI1MjE0OGIyMzVlIiwiaXNfYW5vbnltb3VzIjpmYWxzZX0.VuUX2yhqN-FZseKL8fQG91i1cohfRqW2m1Z8CIWhZuk'; + + const response = await fetch(finalUrl, { + headers: { + 'accept': 'application/json', + 'Authorization': `Bearer ${authToken}` + } + }); + + if (response.ok) { + const result = await response.json() as HotKeywordsResponse; + console.log('成功获取热门关键字数据:', result); + + if (result.success) { + // 将API返回的数据设置到状态 + setHotKeywords(result.data); + } else { + setKeywordsError(result.error || '获取热门关键字数据失败'); + console.error('API调用失败:', result.error || '未知错误'); + + // 设置默认数据 + setHotKeywords([ + { keyword: '价格', count: 45, percentage: 22.5, sentiment_score: 0.2 }, + { keyword: '质量', count: 38, percentage: 19.0, sentiment_score: 0.7 }, + { keyword: '服务', count: 32, percentage: 16.0, sentiment_score: -0.3 }, + { keyword: '快递', count: 28, percentage: 14.0, sentiment_score: 0.1 }, + { keyword: '推荐', count: 24, percentage: 12.0, sentiment_score: 0.8 }, + { keyword: '问题', count: 18, percentage: 9.0, sentiment_score: -0.6 }, + { keyword: '体验', count: 15, percentage: 7.5, sentiment_score: 0.4 } + ]); + } + } else { + const errorText = await response.text(); + setKeywordsError(`获取失败 (${response.status}): ${errorText}`); + console.error('获取热门关键字数据失败,HTTP状态:', response.status, errorText); + + // 设置默认数据 + setHotKeywords([ + { keyword: '价格', count: 45, percentage: 22.5, sentiment_score: 0.2 }, + { keyword: '质量', count: 38, percentage: 19.0, sentiment_score: 0.7 }, + { keyword: '服务', count: 32, percentage: 16.0, sentiment_score: -0.3 }, + { keyword: '快递', count: 28, percentage: 14.0, sentiment_score: 0.1 }, + { keyword: '推荐', count: 24, percentage: 12.0, sentiment_score: 0.8 }, + { keyword: '问题', count: 18, percentage: 9.0, sentiment_score: -0.6 }, + { keyword: '体验', count: 15, percentage: 7.5, sentiment_score: 0.4 } + ]); + } + } catch (error) { + setKeywordsError(`获取热门关键字数据时发生错误: ${error instanceof Error ? error.message : String(error)}`); + console.error('获取热门关键字数据时发生错误:', error); + + // 设置默认数据 + setHotKeywords([ + { keyword: '价格', count: 45, percentage: 22.5, sentiment_score: 0.2 }, + { keyword: '质量', count: 38, percentage: 19.0, sentiment_score: 0.7 }, + { keyword: '服务', count: 32, percentage: 16.0, sentiment_score: -0.3 }, + { keyword: '快递', count: 28, percentage: 14.0, sentiment_score: 0.1 }, + { keyword: '推荐', count: 24, percentage: 12.0, sentiment_score: 0.8 }, + { keyword: '问题', count: 18, percentage: 9.0, sentiment_score: -0.6 }, + { keyword: '体验', count: 15, percentage: 7.5, sentiment_score: 0.4 } + ]); + } finally { + setKeywordsLoading(false); + } }; return ( @@ -2282,6 +2401,57 @@ const Analytics: React.FC = () => { )} + + {/* 热门关键字 */} +
+

热门关键字

+ {keywordsLoading ? ( +
+
+
+ ) : keywordsError ? ( +
+ +

{keywordsError}

+
+ ) : hotKeywords.length === 0 ? ( +
+ +

暂无热门关键字数据

+
+ ) : ( +
+ {hotKeywords.slice(0, 10).map((keyword, index) => ( +
+
+
+ + {index + 1} + + {keyword.keyword} +
+
+
+
+
+ {keyword.count}次 ({keyword.percentage.toFixed(1)}%) +
+
0 ? 'text-green-500' : keyword.sentiment_score < 0 ? 'text-red-500' : 'text-gray-500'}`}> + 情感: {keyword.sentiment_score.toFixed(1)} +
+
+
+
0.3 ? 'bg-green-500' : keyword.sentiment_score < -0.3 ? 'bg-red-500' : 'bg-yellow-500'}`} + style={{ width: `${keyword.percentage}%` }} + >
+
+
+
+ ))} +
+ )} +
)}