cleaning up db
This commit is contained in:
110
Requirements.md
Normal file
110
Requirements.md
Normal file
@@ -0,0 +1,110 @@
|
||||
# KOL表现概览
|
||||
|
||||
- 卡片式布局展示每个KOL的关键表现指标
|
||||
- 显示每个KOL的基础资料(头像、昵称、平台)
|
||||
- 突出展示三个核心指标:粉丝增长量、新增点赞数、新增关注数
|
||||
- 每个指标旁边显示环比变化百分比和趋势箭头
|
||||
- 颜色编码直观表示表现好坏(绿色增长,红色下降)
|
||||
- 支持时间范围切换(7天/30天/90天)
|
||||
- 可按表现指标排序以识别表现最佳/最差的KOL
|
||||
- 帮助团队快速评估每个KOL的营销效果
|
||||
- 点击卡片可展开查看该KOL的详细分析
|
||||
|
||||
# KOL合作转换漏斗
|
||||
|
||||
- 以漏斗图形式展示用户从初次接触到最终转化的全过程
|
||||
- 显示6个转化阶段:曝光、兴趣、考虑、意向、评估、购买
|
||||
- 每个阶段显示用户数量和占比
|
||||
- 相邻阶段间显示转化率百分比
|
||||
- 底部显示三个关键指标:平均转化率、最高转化阶段、最低转化阶段
|
||||
- 根据所选项目和时间范围自动更新数据
|
||||
|
||||
# KOL贴文表现
|
||||
|
||||
- 表格形式展示KOL发布的所有贴文数据
|
||||
- 每行显示一篇贴文的关键指标:标题、KOL、平台、发布日期
|
||||
- 包含互动指标:观看数、点赞数、留言数、分享数
|
||||
- 显示情绪指标评分
|
||||
- 支持按平台和KOL筛选
|
||||
- 可排序功能便于查找表现最佳内容
|
||||
|
||||
# 概览卡片
|
||||
|
||||
- 展示三个核心指标的卡片式布局
|
||||
- 每个卡片包含大数字显示当前值和环比增长
|
||||
- 留言总数卡片:显示所有评论数量及周环比变化
|
||||
- 平均互动率卡片:显示互动量与浏览量的百分比及周环比
|
||||
- 情感分析卡片:显示正面评论占比及周环比提升
|
||||
- 每个卡片包含对应图标和趋势指示器
|
||||
- 直观展示KOL营销效果的总体健康状况
|
||||
|
||||
# 留言趋势
|
||||
|
||||
- 柱状图形式展示一段时间内留言数量的变化
|
||||
- 横轴显示日期,纵轴显示留言数量
|
||||
- 每个柱体代表当天的留言总数
|
||||
- 悬停时显示具体留言数量
|
||||
- 自动计算最大值设置合适的比例尺
|
||||
- 使用蓝色渐变效果提高视觉吸引力
|
||||
- 帮助团队了解用户互动的时间规律
|
||||
|
||||
# 平台分布
|
||||
|
||||
- 横向条形图展示不同社交平台上的评论分布
|
||||
- 每个平台显示对应品牌颜色和图标
|
||||
- 显示具体数量和所占百分比
|
||||
- 条形长度直观反映各平台占比
|
||||
- 帮助团队了解哪些平台效果更好
|
||||
|
||||
# 审核状态分布
|
||||
|
||||
- 环形图展示内容审核状态的分布情况
|
||||
- 包括三种状态:已核准、待审核、已拒绝
|
||||
- 每个状态使用不同颜色直观区分
|
||||
- 显示各状态的数量和百分比
|
||||
- 提供内容管理流程的整体视图
|
||||
|
||||
# 情感分析详情
|
||||
|
||||
- 横向渐变条展示情感分析结果
|
||||
- 从红色(负面)到绿色(正面)的直观展示
|
||||
- 显示正面、中性、负面评论的准确百分比
|
||||
- 黑色指针标记在渐变条上的当前情感位置
|
||||
- 帮助评估品牌声誉和用户满意度
|
||||
|
||||
# 热门文章
|
||||
|
||||
- 列表形式展示最受欢迎的内容
|
||||
- 按留言数量或互动率排序
|
||||
- 显示文章标题和互动数据
|
||||
- 标记高互动内容以引起注意
|
||||
- 帮助识别最成功的内容类型
|
||||
|
||||
# 热门关键字
|
||||
|
||||
- 词云形式展示用户评论中出现频率最高的关键词
|
||||
- 根据词频调整关键词大小和颜色
|
||||
- 使用不同颜色区分不同类别的关键词
|
||||
- 视觉化展现用户关注点
|
||||
- 帮助团队了解用户讨论的主要话题
|
||||
|
||||
# 用户互动时间分析
|
||||
|
||||
- 24小时热力图展示一天中用户互动的高峰时段
|
||||
- 横轴显示24小时时间段,纵轴显示7天的日期
|
||||
- 颜色深浅表示互动量的多少
|
||||
- 自动标注互动高峰和低谷时段
|
||||
- 悬停时显示具体时间点的互动数据
|
||||
- 帮助KOL选择最佳内容发布时间
|
||||
- 可按平台筛选查看不同平台的用户活跃规律
|
||||
|
||||
# 内容表现分析
|
||||
|
||||
- 散点图形式展示所有内容的表现分布
|
||||
- 横轴表示内容覆盖量,纵轴表示互动率
|
||||
- 点的大小代表内容的总互动量
|
||||
- 点的颜色代表不同内容类型或平台
|
||||
- 四象限划分帮助识别高价值内容
|
||||
- 鼠标悬停显示详细指标和内容信息
|
||||
- 支持按时间段、内容类型和KOL筛选
|
||||
- 帮助团队发现最有效的内容模式
|
||||
@@ -8,7 +8,7 @@ const fs = require('fs');
|
||||
dotenv.config({ path: path.resolve(__dirname, '../../.env') });
|
||||
|
||||
// 定义输出目录
|
||||
const DB_REPORTS_DIR = '/Users/liam/code/promote/backend/db-reports';
|
||||
const DB_REPORTS_DIR = path.resolve(__dirname, '../db-reports');
|
||||
|
||||
// 获取ClickHouse配置
|
||||
const clickhouseHost = process.env.CLICKHOUSE_HOST || 'localhost';
|
||||
@@ -16,7 +16,7 @@ if (!databaseUrl) {
|
||||
}
|
||||
|
||||
// 定义输出目录
|
||||
const DB_REPORTS_DIR = '/Users/liam/code/promote/backend/db-reports';
|
||||
const DB_REPORTS_DIR = path.resolve(__dirname, '../db-reports');
|
||||
|
||||
// 连接数据库
|
||||
async function connect() {
|
||||
@@ -83,12 +83,16 @@ async function getTableSchema(client, tableName) {
|
||||
console.log(`\n获取表 ${tableName} 的结构...`);
|
||||
|
||||
try {
|
||||
const query = `
|
||||
// 获取基本列信息
|
||||
const columnsQuery = `
|
||||
SELECT
|
||||
column_name,
|
||||
data_type,
|
||||
is_nullable,
|
||||
column_default
|
||||
column_default,
|
||||
character_maximum_length,
|
||||
numeric_precision,
|
||||
numeric_scale
|
||||
FROM
|
||||
information_schema.columns
|
||||
WHERE
|
||||
@@ -98,19 +102,110 @@ async function getTableSchema(client, tableName) {
|
||||
ordinal_position;
|
||||
`;
|
||||
|
||||
const result = await client.query(query, [tableName]);
|
||||
const columnsResult = await client.query(columnsQuery, [tableName]);
|
||||
|
||||
if (!result.rows || result.rows.length === 0) {
|
||||
if (!columnsResult.rows || columnsResult.rows.length === 0) {
|
||||
console.log(`表 ${tableName} 不存在或没有列`);
|
||||
return null;
|
||||
}
|
||||
|
||||
// 获取主键信息
|
||||
const primaryKeyQuery = `
|
||||
SELECT
|
||||
kcu.column_name
|
||||
FROM
|
||||
information_schema.table_constraints tc
|
||||
JOIN information_schema.key_column_usage kcu
|
||||
ON tc.constraint_name = kcu.constraint_name
|
||||
AND tc.table_schema = kcu.table_schema
|
||||
WHERE
|
||||
tc.constraint_type = 'PRIMARY KEY' AND
|
||||
tc.table_schema = 'public' AND
|
||||
tc.table_name = $1
|
||||
ORDER BY
|
||||
kcu.ordinal_position;
|
||||
`;
|
||||
|
||||
const primaryKeyResult = await client.query(primaryKeyQuery, [tableName]);
|
||||
|
||||
// 获取外键信息
|
||||
const foreignKeysQuery = `
|
||||
SELECT
|
||||
kcu.column_name,
|
||||
ccu.table_name AS foreign_table_name,
|
||||
ccu.column_name AS foreign_column_name
|
||||
FROM
|
||||
information_schema.table_constraints tc
|
||||
JOIN information_schema.key_column_usage kcu
|
||||
ON tc.constraint_name = kcu.constraint_name
|
||||
AND tc.table_schema = kcu.table_schema
|
||||
JOIN information_schema.constraint_column_usage ccu
|
||||
ON tc.constraint_name = ccu.constraint_name
|
||||
AND tc.table_schema = ccu.table_schema
|
||||
WHERE
|
||||
tc.constraint_type = 'FOREIGN KEY' AND
|
||||
tc.table_schema = 'public' AND
|
||||
tc.table_name = $1;
|
||||
`;
|
||||
|
||||
const foreignKeysResult = await client.query(foreignKeysQuery, [tableName]);
|
||||
|
||||
// 获取索引信息
|
||||
const indexesQuery = `
|
||||
SELECT
|
||||
indexname,
|
||||
indexdef
|
||||
FROM
|
||||
pg_indexes
|
||||
WHERE
|
||||
schemaname = 'public' AND
|
||||
tablename = $1;
|
||||
`;
|
||||
|
||||
const indexesResult = await client.query(indexesQuery, [tableName]);
|
||||
|
||||
// 输出列信息
|
||||
console.log(`表 ${tableName} 的列:`);
|
||||
result.rows.forEach(column => {
|
||||
console.log(` - ${column.column_name} (${column.data_type}, ${column.is_nullable === 'YES' ? '可为空' : '不可为空'}, 默认值: ${column.column_default || 'NULL'})`);
|
||||
columnsResult.rows.forEach(column => {
|
||||
console.log(` - ${column.column_name} (${column.data_type}${
|
||||
column.character_maximum_length ? `(${column.character_maximum_length})` :
|
||||
(column.numeric_precision ? `(${column.numeric_precision},${column.numeric_scale})` : '')
|
||||
}, ${column.is_nullable === 'YES' ? '可为空' : '不可为空'}, 默认值: ${column.column_default || 'NULL'})`);
|
||||
});
|
||||
|
||||
return result.rows;
|
||||
// 输出主键信息
|
||||
if (primaryKeyResult.rows.length > 0) {
|
||||
console.log(` 主键: ${primaryKeyResult.rows.map(row => row.column_name).join(', ')}`);
|
||||
} else {
|
||||
console.log(' 主键: 无');
|
||||
}
|
||||
|
||||
// 输出外键信息
|
||||
if (foreignKeysResult.rows.length > 0) {
|
||||
console.log(' 外键:');
|
||||
foreignKeysResult.rows.forEach(fk => {
|
||||
console.log(` - ${fk.column_name} -> ${fk.foreign_table_name}.${fk.foreign_column_name}`);
|
||||
});
|
||||
} else {
|
||||
console.log(' 外键: 无');
|
||||
}
|
||||
|
||||
// 输出索引信息
|
||||
if (indexesResult.rows.length > 0) {
|
||||
console.log(' 索引:');
|
||||
indexesResult.rows.forEach(idx => {
|
||||
console.log(` - ${idx.indexname}: ${idx.indexdef}`);
|
||||
});
|
||||
} else {
|
||||
console.log(' 索引: 无');
|
||||
}
|
||||
|
||||
return {
|
||||
columns: columnsResult.rows,
|
||||
primaryKey: primaryKeyResult.rows,
|
||||
foreignKeys: foreignKeysResult.rows,
|
||||
indexes: indexesResult.rows
|
||||
};
|
||||
} catch (error) {
|
||||
console.error(`获取表 ${tableName} 结构时出错:`, error);
|
||||
return null;
|
||||
@@ -180,17 +275,11 @@ async function main() {
|
||||
console.log('\n所有PostgreSQL表:');
|
||||
console.log(tables.join(', '));
|
||||
|
||||
// 获取特定表的结构,但不获取数据示例
|
||||
const requiredTables = ['projects', 'influencers', 'project_influencers', 'posts'];
|
||||
|
||||
for (const tableName of requiredTables) {
|
||||
if (tables.includes(tableName)) {
|
||||
await getTableSchema(client, tableName);
|
||||
// 移除数据示例检查
|
||||
// await getTableDataSample(client, tableName);
|
||||
} else {
|
||||
console.log(`\n表 ${tableName} 不存在`);
|
||||
}
|
||||
// 获取所有表的结构,而不只是特定表
|
||||
for (const tableName of tables) {
|
||||
await getTableSchema(client, tableName);
|
||||
// 移除数据示例检查
|
||||
// await getTableDataSample(client, tableName);
|
||||
}
|
||||
|
||||
console.log('\n数据库结构检查完成');
|
||||
@@ -8,7 +8,7 @@ const postgresScriptPath = path.join(__dirname, 'postgres-schema.js');
|
||||
const clickhouseScriptPath = path.join(__dirname, 'clickhouse-schema.js');
|
||||
|
||||
// 定义输出目录
|
||||
const DB_REPORTS_DIR = '/Users/liam/code/promote/backend/db-reports';
|
||||
const DB_REPORTS_DIR = path.resolve(__dirname, '../db-reports');
|
||||
|
||||
// 确保目录存在
|
||||
if (!fs.existsSync(DB_REPORTS_DIR)) {
|
||||
98
backend/db/sql/clickhouse/ch-query.sh
Executable file
98
backend/db/sql/clickhouse/ch-query.sh
Executable file
@@ -0,0 +1,98 @@
|
||||
#!/bin/bash
|
||||
# 文件名: ch-query.sh
|
||||
# 用途: 执行ClickHouse SQL查询的便捷脚本
|
||||
|
||||
# 连接参数
|
||||
CH_HOST="localhost"
|
||||
CH_PORT="9000"
|
||||
CH_USER="admin"
|
||||
CH_PASSWORD="your_secure_password"
|
||||
CH_DATABASE="promote"
|
||||
|
||||
# 基本查询函数
|
||||
function ch_query() {
|
||||
clickhouse client --host $CH_HOST --port $CH_PORT --user $CH_USER --password $CH_PASSWORD --database $CH_DATABASE -q "$1"
|
||||
}
|
||||
|
||||
# 显示帮助信息
|
||||
function show_help() {
|
||||
echo "ClickHouse 查询工具"
|
||||
echo "用法: $0 [选项] [SQL查询]"
|
||||
echo ""
|
||||
echo "选项:"
|
||||
echo " -t 显示所有表"
|
||||
echo " -d <表名> 显示表结构"
|
||||
echo " -s <表名> 显示表样本数据(前10行)"
|
||||
echo " -c <表名> 计算表中的记录数"
|
||||
echo " -h, --help 显示此帮助信息"
|
||||
echo " -q \"SQL查询\" 执行自定义SQL查询"
|
||||
echo " -f <文件名> 执行SQL文件"
|
||||
echo ""
|
||||
echo "示例:"
|
||||
echo " $0 -t # 显示所有表"
|
||||
echo " $0 -d events # 显示events表结构"
|
||||
echo " $0 -q \"SELECT * FROM events LIMIT 5\" # 执行自定义查询"
|
||||
}
|
||||
|
||||
# 没有参数时显示帮助
|
||||
if [ $# -eq 0 ]; then
|
||||
show_help
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 处理命令行参数
|
||||
case "$1" in
|
||||
-t)
|
||||
ch_query "SHOW TABLES"
|
||||
;;
|
||||
-d)
|
||||
if [ -z "$2" ]; then
|
||||
echo "错误: 需要提供表名"
|
||||
exit 1
|
||||
fi
|
||||
ch_query "DESCRIBE TABLE $2"
|
||||
;;
|
||||
-s)
|
||||
if [ -z "$2" ]; then
|
||||
echo "错误: 需要提供表名"
|
||||
exit 1
|
||||
fi
|
||||
ch_query "SELECT * FROM $2 LIMIT 10"
|
||||
;;
|
||||
-c)
|
||||
if [ -z "$2" ]; then
|
||||
echo "错误: 需要提供表名"
|
||||
exit 1
|
||||
fi
|
||||
ch_query "SELECT COUNT(*) FROM $2"
|
||||
;;
|
||||
-q)
|
||||
if [ -z "$2" ]; then
|
||||
echo "错误: 需要提供SQL查询"
|
||||
exit 1
|
||||
fi
|
||||
ch_query "$2"
|
||||
;;
|
||||
-f)
|
||||
if [ -z "$2" ]; then
|
||||
echo "错误: 需要提供SQL文件"
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -f "$2" ]; then
|
||||
echo "错误: 文件 '$2' 不存在"
|
||||
exit 1
|
||||
fi
|
||||
SQL=$(cat "$2")
|
||||
ch_query "$SQL"
|
||||
;;
|
||||
-h|--help)
|
||||
show_help
|
||||
;;
|
||||
*)
|
||||
echo "未知选项: $1"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
4
backend/db/sql/clickhouse/clickhouse.md
Normal file
4
backend/db/sql/clickhouse/clickhouse.md
Normal file
@@ -0,0 +1,4 @@
|
||||
```bash
|
||||
alias clickhouse-sql='clickhouse client --host localhost --port 9000 --user admin --password your_secure_password --database promote -q'
|
||||
clickhouse-sql "SHOW TABLES"
|
||||
```
|
||||
251
backend/db/sql/clickhouse/create_events.sql
Normal file
251
backend/db/sql/clickhouse/create_events.sql
Normal file
@@ -0,0 +1,251 @@
|
||||
-- 删除旧表
|
||||
DROP TABLE IF EXISTS events;
|
||||
|
||||
DROP TABLE IF EXISTS mv_kol_performance;
|
||||
|
||||
DROP TABLE IF EXISTS mv_platform_distribution;
|
||||
|
||||
DROP TABLE IF EXISTS mv_sentiment_analysis;
|
||||
|
||||
DROP TABLE IF EXISTS mv_interaction_time;
|
||||
|
||||
DROP TABLE IF EXISTS mv_conversion_funnel;
|
||||
|
||||
-- 创建新的events表
|
||||
CREATE TABLE events (
|
||||
-- 基本信息
|
||||
event_id UUID DEFAULT generateUUIDv4(),
|
||||
timestamp DateTime DEFAULT now(),
|
||||
date Date DEFAULT toDate(now()),
|
||||
hour UInt8 DEFAULT toHour(now()),
|
||||
-- 实体关联
|
||||
user_id String,
|
||||
influencer_id String,
|
||||
content_id String,
|
||||
project_id String,
|
||||
-- 事件信息
|
||||
event_type Enum8(
|
||||
'view' = 1,
|
||||
-- 浏览
|
||||
'like' = 2,
|
||||
-- 点赞
|
||||
'unlike' = 3,
|
||||
-- 取消点赞
|
||||
'follow' = 4,
|
||||
-- 关注
|
||||
'unfollow' = 5,
|
||||
-- 取消关注
|
||||
'comment' = 6,
|
||||
-- 评论
|
||||
'share' = 7,
|
||||
-- 分享
|
||||
'click' = 8,
|
||||
-- 点击链接
|
||||
'impression' = 9,
|
||||
-- 曝光
|
||||
'purchase' = 10,
|
||||
-- 购买
|
||||
'signup' = 11 -- 注册
|
||||
),
|
||||
-- 转化漏斗
|
||||
funnel_stage Enum8(
|
||||
'exposure' = 1,
|
||||
-- 曝光
|
||||
'interest' = 2,
|
||||
-- 兴趣
|
||||
'consideration' = 3,
|
||||
-- 考虑
|
||||
'intent' = 4,
|
||||
-- 意向
|
||||
'evaluation' = 5,
|
||||
-- 评估
|
||||
'purchase' = 6 -- 购买
|
||||
),
|
||||
-- 内容信息
|
||||
platform String,
|
||||
-- 社交平台
|
||||
content_type Enum8(
|
||||
'video' = 1,
|
||||
'image' = 2,
|
||||
'text' = 3,
|
||||
'story' = 4,
|
||||
'reel' = 5,
|
||||
'live' = 6
|
||||
),
|
||||
content_status Enum8(
|
||||
-- 审核状态
|
||||
'approved' = 1,
|
||||
'pending' = 2,
|
||||
'rejected' = 3
|
||||
),
|
||||
-- 互动分析
|
||||
sentiment Enum8(
|
||||
-- 情感分析
|
||||
'positive' = 1,
|
||||
'neutral' = 2,
|
||||
'negative' = 3
|
||||
),
|
||||
comment_text String,
|
||||
-- 评论文本
|
||||
keywords Array(String),
|
||||
-- 关键词
|
||||
-- 数值指标
|
||||
interaction_value Float64,
|
||||
-- 互动价值
|
||||
followers_count UInt32,
|
||||
-- 粉丝数
|
||||
followers_change Int32,
|
||||
-- 粉丝变化量
|
||||
likes_count UInt32,
|
||||
-- 点赞数
|
||||
likes_change Int32,
|
||||
-- 点赞变化量
|
||||
views_count UInt32,
|
||||
-- 观看数
|
||||
-- 设备信息
|
||||
ip String,
|
||||
user_agent String,
|
||||
device_type String,
|
||||
referrer String,
|
||||
-- 地理信息
|
||||
geo_country String,
|
||||
geo_city String,
|
||||
-- 会话信息
|
||||
session_id String
|
||||
) ENGINE = MergeTree() PARTITION BY toYYYYMM(timestamp)
|
||||
ORDER BY
|
||||
(event_type, influencer_id, date, hour) SETTINGS index_granularity = 8192;
|
||||
|
||||
-- 创建物化视图:KOL表现概览
|
||||
CREATE MATERIALIZED VIEW mv_kol_performance ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
|
||||
ORDER BY
|
||||
(influencer_id, date) AS
|
||||
SELECT
|
||||
influencer_id,
|
||||
date,
|
||||
sum(if(event_type = 'follow', 1, 0)) - sum(if(event_type = 'unfollow', 1, 0)) AS new_followers,
|
||||
sum(if(event_type = 'like', 1, 0)) - sum(if(event_type = 'unlike', 1, 0)) AS new_likes,
|
||||
sum(if(event_type = 'view', 1, 0)) AS views,
|
||||
sum(if(event_type = 'comment', 1, 0)) AS comments,
|
||||
sum(if(event_type = 'share', 1, 0)) AS shares
|
||||
FROM
|
||||
events
|
||||
GROUP BY
|
||||
influencer_id,
|
||||
date;
|
||||
|
||||
-- 创建物化视图:平台分布
|
||||
CREATE MATERIALIZED VIEW mv_platform_distribution ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
|
||||
ORDER BY
|
||||
(platform, date) AS
|
||||
SELECT
|
||||
platform,
|
||||
date,
|
||||
count() AS events_count,
|
||||
uniqExact(user_id) AS unique_users,
|
||||
uniqExact(content_id) AS unique_contents
|
||||
FROM
|
||||
events
|
||||
GROUP BY
|
||||
platform,
|
||||
date;
|
||||
|
||||
-- 创建物化视图:情感分析
|
||||
CREATE MATERIALIZED VIEW mv_sentiment_analysis ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
|
||||
ORDER BY
|
||||
(sentiment, date) AS
|
||||
SELECT
|
||||
sentiment,
|
||||
date,
|
||||
count() AS count
|
||||
FROM
|
||||
events
|
||||
WHERE
|
||||
sentiment IS NOT NULL
|
||||
AND event_type = 'comment'
|
||||
GROUP BY
|
||||
sentiment,
|
||||
date;
|
||||
|
||||
-- 创建物化视图:用户互动时间
|
||||
CREATE MATERIALIZED VIEW mv_interaction_time ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
|
||||
ORDER BY
|
||||
(date, hour) AS
|
||||
SELECT
|
||||
date,
|
||||
hour,
|
||||
count() AS events_count,
|
||||
uniqExact(user_id) AS unique_users
|
||||
FROM
|
||||
events
|
||||
GROUP BY
|
||||
date,
|
||||
hour;
|
||||
|
||||
-- 创建物化视图:内容审核状态
|
||||
CREATE MATERIALIZED VIEW mv_content_status ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
|
||||
ORDER BY
|
||||
(content_status, date) AS
|
||||
SELECT
|
||||
content_status,
|
||||
date,
|
||||
count() AS count
|
||||
FROM
|
||||
events
|
||||
WHERE
|
||||
content_status IS NOT NULL
|
||||
GROUP BY
|
||||
content_status,
|
||||
date;
|
||||
|
||||
-- 创建物化视图:转化漏斗
|
||||
CREATE MATERIALIZED VIEW mv_conversion_funnel ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
|
||||
ORDER BY
|
||||
(funnel_stage, date) AS
|
||||
SELECT
|
||||
funnel_stage,
|
||||
date,
|
||||
count() AS stage_count,
|
||||
uniqExact(user_id) AS unique_users
|
||||
FROM
|
||||
events
|
||||
WHERE
|
||||
funnel_stage IS NOT NULL
|
||||
GROUP BY
|
||||
funnel_stage,
|
||||
date;
|
||||
|
||||
-- 创建物化视图:热门内容
|
||||
CREATE MATERIALIZED VIEW mv_popular_content ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
|
||||
ORDER BY
|
||||
(content_id, date) AS
|
||||
SELECT
|
||||
content_id,
|
||||
influencer_id,
|
||||
date,
|
||||
sum(if(event_type = 'view', 1, 0)) AS views,
|
||||
sum(if(event_type = 'like', 1, 0)) AS likes,
|
||||
sum(if(event_type = 'comment', 1, 0)) AS comments,
|
||||
sum(if(event_type = 'share', 1, 0)) AS shares
|
||||
FROM
|
||||
events
|
||||
GROUP BY
|
||||
content_id,
|
||||
influencer_id,
|
||||
date;
|
||||
|
||||
-- 创建物化视图:关键词分析
|
||||
CREATE MATERIALIZED VIEW mv_keywords ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
|
||||
ORDER BY
|
||||
(keyword, date) AS
|
||||
SELECT
|
||||
arrayJoin(keywords) AS keyword,
|
||||
date,
|
||||
count() AS frequency
|
||||
FROM
|
||||
events
|
||||
WHERE
|
||||
length(keywords) > 0
|
||||
GROUP BY
|
||||
keyword,
|
||||
date;
|
||||
331
backend/db/sql/postgres/pg-query.js
Executable file
331
backend/db/sql/postgres/pg-query.js
Executable file
@@ -0,0 +1,331 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// # 显示所有表
|
||||
// ./pg-query.js -t
|
||||
|
||||
// # 显示表结构
|
||||
// ./pg-query.js -d influencers
|
||||
|
||||
// # 显示样本数据,限制5行
|
||||
// ./pg-query.js -s posts -l 5
|
||||
|
||||
// # 查看表记录数
|
||||
// ./pg-query.js -c posts
|
||||
|
||||
// # 显示索引
|
||||
// ./pg-query.js -i posts
|
||||
|
||||
// # 显示外键
|
||||
// ./pg-query.js -f posts
|
||||
|
||||
// # 显示引用
|
||||
// ./pg-query.js -r influencers
|
||||
|
||||
// # 执行自定义查询
|
||||
// ./pg-query.js -q "SELECT * FROM influencers WHERE platform = 'Instagram' LIMIT 5"
|
||||
|
||||
// # 执行SQL文件
|
||||
// ./pg-query.js -e schema.sql
|
||||
|
||||
const { Client } = require('pg');
|
||||
const path = require('path');
|
||||
const fs = require('fs');
|
||||
const yargs = require('yargs/yargs');
|
||||
const { hideBin } = require('yargs/helpers');
|
||||
|
||||
// 加载.env文件 - 使用正确的相对路径
|
||||
require('dotenv').config({ path: path.resolve(__dirname, '../../../.env') });
|
||||
|
||||
// 显示连接信息(不含密码)以便调试
|
||||
function getConnectionString() {
|
||||
// 使用.env中的DATABASE_URL
|
||||
const databaseUrl = process.env.DATABASE_URL;
|
||||
|
||||
if (!databaseUrl) {
|
||||
console.error('错误: 未找到DATABASE_URL环境变量');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// 显示连接信息但隐藏密码
|
||||
const sanitizedUrl = databaseUrl.replace(/:[^:@]+@/, ':***@');
|
||||
console.log(`使用连接: ${sanitizedUrl}`);
|
||||
|
||||
return databaseUrl;
|
||||
}
|
||||
|
||||
// 创建一个新的客户端
|
||||
async function runQuery(query, params = []) {
|
||||
const client = new Client({
|
||||
connectionString: getConnectionString()
|
||||
});
|
||||
|
||||
try {
|
||||
await client.connect();
|
||||
console.log('数据库连接成功');
|
||||
const result = await client.query(query, params);
|
||||
return result.rows;
|
||||
} catch (err) {
|
||||
console.error('查询执行错误:', err.message);
|
||||
return null;
|
||||
} finally {
|
||||
await client.end();
|
||||
}
|
||||
}
|
||||
|
||||
// 显示所有表
|
||||
async function showTables() {
|
||||
const query = "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' ORDER BY table_name;";
|
||||
const tables = await runQuery(query);
|
||||
|
||||
if (tables && tables.length > 0) {
|
||||
console.log('数据库中的表:');
|
||||
console.table(tables);
|
||||
} else {
|
||||
console.log('没有找到表或连接失败');
|
||||
}
|
||||
}
|
||||
|
||||
// 显示表结构
|
||||
async function showTableStructure(tableName) {
|
||||
const query = `
|
||||
SELECT
|
||||
column_name AS "列名",
|
||||
data_type AS "数据类型",
|
||||
CASE WHEN is_nullable = 'YES' THEN '允许为空' ELSE '不允许为空' END AS "是否可空",
|
||||
column_default AS "默认值",
|
||||
character_maximum_length AS "最大长度"
|
||||
FROM
|
||||
information_schema.columns
|
||||
WHERE
|
||||
table_schema = 'public' AND
|
||||
table_name = $1
|
||||
ORDER BY
|
||||
ordinal_position;
|
||||
`;
|
||||
|
||||
const columns = await runQuery(query, [tableName]);
|
||||
|
||||
if (columns && columns.length > 0) {
|
||||
console.log(`表 ${tableName} 的结构:`);
|
||||
console.table(columns);
|
||||
} else {
|
||||
console.log(`表 ${tableName} 不存在或连接失败`);
|
||||
}
|
||||
}
|
||||
|
||||
// 显示样本数据
|
||||
async function showSampleData(tableName, limit = 10) {
|
||||
const query = `SELECT * FROM "${tableName}" LIMIT ${limit};`;
|
||||
const data = await runQuery(query);
|
||||
|
||||
if (data && data.length > 0) {
|
||||
console.log(`表 ${tableName} 的样本数据 (${limit} 行):`);
|
||||
console.table(data);
|
||||
} else {
|
||||
console.log(`表 ${tableName} 为空或不存在`);
|
||||
}
|
||||
}
|
||||
|
||||
// 显示记录计数
|
||||
async function showRecordCount(tableName) {
|
||||
const query = `SELECT COUNT(*) AS "记录数" FROM "${tableName}";`;
|
||||
const count = await runQuery(query);
|
||||
|
||||
if (count) {
|
||||
console.log(`表 ${tableName} 的记录数:`);
|
||||
console.table(count);
|
||||
} else {
|
||||
console.log(`表 ${tableName} 不存在或连接失败`);
|
||||
}
|
||||
}
|
||||
|
||||
// 显示索引信息
|
||||
async function showIndexes(tableName) {
|
||||
const query = `
|
||||
SELECT
|
||||
indexname AS "索引名称",
|
||||
indexdef AS "索引定义"
|
||||
FROM
|
||||
pg_indexes
|
||||
WHERE
|
||||
tablename = $1
|
||||
ORDER BY
|
||||
indexname;
|
||||
`;
|
||||
|
||||
const indexes = await runQuery(query, [tableName]);
|
||||
|
||||
if (indexes && indexes.length > 0) {
|
||||
console.log(`表 ${tableName} 的索引:`);
|
||||
console.table(indexes);
|
||||
} else {
|
||||
console.log(`表 ${tableName} 没有索引或不存在`);
|
||||
}
|
||||
}
|
||||
|
||||
// 显示外键
|
||||
async function showForeignKeys(tableName) {
|
||||
const query = `
|
||||
SELECT
|
||||
conname AS "外键名称",
|
||||
pg_get_constraintdef(oid) AS "外键定义"
|
||||
FROM
|
||||
pg_constraint
|
||||
WHERE
|
||||
conrelid = $1::regclass AND contype = 'f';
|
||||
`;
|
||||
|
||||
const foreignKeys = await runQuery(query, [tableName]);
|
||||
|
||||
if (foreignKeys && foreignKeys.length > 0) {
|
||||
console.log(`表 ${tableName} 的外键:`);
|
||||
console.table(foreignKeys);
|
||||
} else {
|
||||
console.log(`表 ${tableName} 没有外键或不存在`);
|
||||
}
|
||||
}
|
||||
|
||||
// 显示引用当前表的外键
|
||||
async function showReferencingKeys(tableName) {
|
||||
const query = `
|
||||
SELECT
|
||||
c.conname AS "外键名称",
|
||||
t.relname AS "引用表",
|
||||
pg_get_constraintdef(c.oid) AS "外键定义"
|
||||
FROM
|
||||
pg_constraint c
|
||||
JOIN
|
||||
pg_class t ON c.conrelid = t.oid
|
||||
WHERE
|
||||
c.confrelid = $1::regclass AND c.contype = 'f';
|
||||
`;
|
||||
|
||||
const referencingKeys = await runQuery(query, [tableName]);
|
||||
|
||||
if (referencingKeys && referencingKeys.length > 0) {
|
||||
console.log(`引用表 ${tableName} 的外键关系:`);
|
||||
console.table(referencingKeys);
|
||||
} else {
|
||||
console.log(`没有找到引用表 ${tableName} 的外键关系`);
|
||||
}
|
||||
}
|
||||
|
||||
// 执行自定义查询
|
||||
async function executeQuery(query) {
|
||||
const result = await runQuery(query);
|
||||
|
||||
if (result) {
|
||||
console.log('查询结果:');
|
||||
console.table(result);
|
||||
} else {
|
||||
console.log('查询执行失败或无结果');
|
||||
}
|
||||
}
|
||||
|
||||
// 执行SQL文件
|
||||
async function executeSqlFile(filename) {
|
||||
try {
|
||||
const sql = fs.readFileSync(filename, 'utf8');
|
||||
console.log(`执行SQL文件: ${filename}`);
|
||||
await executeQuery(sql);
|
||||
} catch (err) {
|
||||
console.error(`执行SQL文件失败: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 主函数
|
||||
async function main() {
|
||||
try {
|
||||
const argv = yargs(hideBin(process.argv))
|
||||
.usage('PostgreSQL 查询工具\n\n用法: $0 [选项]')
|
||||
.option('t', {
|
||||
alias: 'tables',
|
||||
describe: '显示所有表',
|
||||
type: 'boolean'
|
||||
})
|
||||
.option('d', {
|
||||
alias: 'describe',
|
||||
describe: '显示表结构',
|
||||
type: 'string'
|
||||
})
|
||||
.option('s', {
|
||||
alias: 'sample',
|
||||
describe: '显示表样本数据',
|
||||
type: 'string'
|
||||
})
|
||||
.option('l', {
|
||||
alias: 'limit',
|
||||
describe: '样本数据行数限制',
|
||||
type: 'number',
|
||||
default: 10
|
||||
})
|
||||
.option('c', {
|
||||
alias: 'count',
|
||||
describe: '计算表中的记录数',
|
||||
type: 'string'
|
||||
})
|
||||
.option('i', {
|
||||
alias: 'indexes',
|
||||
describe: '显示表索引',
|
||||
type: 'string'
|
||||
})
|
||||
.option('f', {
|
||||
alias: 'foreign-keys',
|
||||
describe: '显示表外键关系',
|
||||
type: 'string'
|
||||
})
|
||||
.option('r', {
|
||||
alias: 'references',
|
||||
describe: '显示引用此表的外键',
|
||||
type: 'string'
|
||||
})
|
||||
.option('q', {
|
||||
alias: 'query',
|
||||
describe: '执行自定义SQL查询',
|
||||
type: 'string'
|
||||
})
|
||||
.option('e', {
|
||||
alias: 'execute-file',
|
||||
describe: '执行SQL文件',
|
||||
type: 'string'
|
||||
})
|
||||
.example('$0 -t', '显示所有表')
|
||||
.example('$0 -d influencers', '显示influencers表结构')
|
||||
.example('$0 -s posts -l 5', '显示posts表前5行数据')
|
||||
.epilog('更多信息请访问项目文档')
|
||||
.help()
|
||||
.alias('h', 'help')
|
||||
.argv;
|
||||
|
||||
if (argv.tables) {
|
||||
await showTables();
|
||||
} else if (argv.describe) {
|
||||
await showTableStructure(argv.describe);
|
||||
} else if (argv.sample) {
|
||||
await showSampleData(argv.sample, argv.limit);
|
||||
} else if (argv.count) {
|
||||
await showRecordCount(argv.count);
|
||||
} else if (argv.indexes) {
|
||||
await showIndexes(argv.indexes);
|
||||
} else if (argv.foreignKeys) {
|
||||
await showForeignKeys(argv.foreignKeys);
|
||||
} else if (argv.references) {
|
||||
await showReferencingKeys(argv.references);
|
||||
} else if (argv.query) {
|
||||
await executeQuery(argv.query);
|
||||
} else if (argv.executeFile) {
|
||||
await executeSqlFile(argv.executeFile);
|
||||
} else {
|
||||
yargs(hideBin(process.argv)).showHelp();
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('程序执行错误:', err.message);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// 执行主函数
|
||||
main().catch(err => {
|
||||
console.error('程序执行错误:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
@@ -9,7 +9,10 @@
|
||||
"start": "node dist/index.js",
|
||||
"lint": "eslint src --ext .ts",
|
||||
"test": "vitest run",
|
||||
"test:analytics": "tsx scripts/analytics-test.ts"
|
||||
"test:analytics": "tsx scripts/analytics-test.ts",
|
||||
"ch": "bash db/sql/clickhouse/ch-query.sh",
|
||||
"check-schema": "node db/db-inspector/run-all.js",
|
||||
"pg": "node db/sql/postgres/pg-query.js"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
@@ -24,8 +27,10 @@
|
||||
"dotenv": "^16.4.7",
|
||||
"hono": "^4.7.4",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
"pg": "^8.14.0",
|
||||
"redis": "^4.7.0",
|
||||
"uuid": "^11.1.0"
|
||||
"uuid": "^11.1.0",
|
||||
"yargs": "^17.7.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@clickhouse/client": "^1.10.1",
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 一键运行所有数据库检查脚本
|
||||
echo "============================================="
|
||||
echo "开始运行所有数据库结构检查脚本..."
|
||||
echo "============================================="
|
||||
|
||||
# 定义输出目录
|
||||
DB_REPORTS_DIR="/Users/liam/code/promote/backend/db-reports"
|
||||
|
||||
# 确保目录存在
|
||||
mkdir -p "$DB_REPORTS_DIR"
|
||||
echo "输出目录: $DB_REPORTS_DIR"
|
||||
|
||||
# 获取当前时间戳
|
||||
TIMESTAMP=$(date +"%Y-%m-%dT%H-%M-%S")
|
||||
echo "时间戳: $TIMESTAMP"
|
||||
echo ""
|
||||
|
||||
# 运行PostgreSQL脚本
|
||||
echo "============================================="
|
||||
echo "正在运行PostgreSQL数据库结构检查脚本..."
|
||||
echo "============================================="
|
||||
NODE_OPTIONS="--no-deprecation --no-warnings" node --no-inspect backend/scripts/db-inspector/postgres-schema.js
|
||||
|
||||
# 等待1秒
|
||||
sleep 1
|
||||
|
||||
# 运行ClickHouse脚本
|
||||
echo ""
|
||||
echo "============================================="
|
||||
echo "正在运行ClickHouse数据库结构检查脚本..."
|
||||
echo "============================================="
|
||||
NODE_OPTIONS="--no-deprecation --no-warnings" node --no-inspect backend/scripts/db-inspector/clickhouse-schema.js
|
||||
|
||||
# 完成信息
|
||||
echo ""
|
||||
echo "============================================="
|
||||
echo "所有数据库结构检查脚本已完成!"
|
||||
echo "报告已保存到以下目录:"
|
||||
echo "$DB_REPORTS_DIR"
|
||||
echo "============================================="
|
||||
|
||||
# 列出生成的报告文件
|
||||
echo ""
|
||||
echo "生成的报告文件:"
|
||||
ls -la "$DB_REPORTS_DIR"
|
||||
100
discuss.md
Normal file
100
discuss.md
Normal file
@@ -0,0 +1,100 @@
|
||||
<think>The user wants me to discuss whether the current database design is suitable based on the Requirements.md document and the database schema reports. They specifically mentioned that they believe that in the ClickHouse database, only the events table is necessary and the other tables could be removed.
|
||||
|
||||
Let me analyze both the requirements and the database schema:
|
||||
|
||||
1. From Requirements.md:
|
||||
- We need to track KOL (Key Opinion Leader) performance metrics
|
||||
- Multiple visualization components are required including overview statistics, conversion funnels, post performance, etc.
|
||||
- Data requirements include metrics like follower growth, new likes, new follows, interaction rates, etc.
|
||||
- Need to analyze sentiment, popular content, platforms distribution, etc.
|
||||
|
||||
2. From the database reports:
|
||||
- PostgreSQL database has tables for projects, influencers, project_influencers, posts, comments, etc.
|
||||
- ClickHouse database has tables: events, follower_events, like_events, view_events
|
||||
|
||||
Let me analyze if we could simplify the ClickHouse schema by keeping only the events table:
|
||||
|
||||
The current ClickHouse design has:
|
||||
- events (general events table)
|
||||
- follower_events (specific for follow/unfollow actions)
|
||||
- like_events (specific for like/unlike actions)
|
||||
- view_events (specific for content views)
|
||||
|
||||
The user's suggestion is to potentially consolidate all these into just the general events table. Let me evaluate if this makes sense based on the requirements and typical ClickHouse usage patterns.</think># 数据库设计评估
|
||||
|
||||
根据您提供的需求文档和数据库结构报告,我来分析当前数据库设计是否合适,特别是关于ClickHouse中的表设计。
|
||||
|
||||
## 现有数据库结构
|
||||
|
||||
### PostgreSQL (关系型数据)
|
||||
- `projects`: 项目基本信息
|
||||
- `influencers`: KOL基本信息和统计数据
|
||||
- `project_influencers`: 项目和KOL的关联关系
|
||||
- `posts`: KOL发布的内容信息
|
||||
- `comments`, `project_comments`, `user_profiles`: 其他辅助数据
|
||||
|
||||
### ClickHouse (分析型数据)
|
||||
- `events`: 通用事件表
|
||||
- `follower_events`: 关注/取消关注事件
|
||||
- `like_events`: 点赞/取消点赞事件
|
||||
- `view_events`: 内容浏览事件
|
||||
|
||||
## 关于您的建议
|
||||
|
||||
您认为ClickHouse中除了events表,其他表可以不需要。这个建议有合理性,分析如下:
|
||||
|
||||
### 支持合并到一张表的理由
|
||||
|
||||
1. **简化架构**: 单一事件表更容易维护和理解。
|
||||
|
||||
2. **查询灵活性**: 通过在`events`表中添加适当的`event_type`和必要字段,可以覆盖所有现有特定表的功能。
|
||||
|
||||
3. **聚合效率**: ClickHouse在处理大表聚合时非常高效,不会因为表的合并而显著影响性能。
|
||||
|
||||
4. **满足现有需求**: 从需求文档看,主要需要的是事件计数和趋势分析,这些可以通过单一事件表实现。
|
||||
|
||||
### 修改建议
|
||||
|
||||
如果合并为单一`events`表,可以进行如下设计:
|
||||
|
||||
```sql
|
||||
CREATE TABLE events (
|
||||
event_id UUID DEFAULT generateUUIDv4(),
|
||||
user_id String,
|
||||
content_id String, -- 可以是post_id或其他内容ID
|
||||
related_id String, -- 例如关注事件中的followed_id
|
||||
event_type Enum8(
|
||||
'view' = 1,
|
||||
'like' = 2,
|
||||
'unlike' = 3,
|
||||
'follow' = 4,
|
||||
'unfollow' = 5,
|
||||
'comment' = 6,
|
||||
'share' = 7
|
||||
-- 可扩展更多类型
|
||||
),
|
||||
value Float64, -- 通用数值字段
|
||||
ip String, -- 适用于某些事件类型
|
||||
user_agent String, -- 适用于某些事件类型
|
||||
timestamp DateTime DEFAULT now()
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
PARTITION BY toYYYYMM(timestamp)
|
||||
ORDER BY (event_type, timestamp);
|
||||
```
|
||||
|
||||
### 潜在考虑事项
|
||||
|
||||
1. **查询性能**: 对于特定类型的查询,专用表可能性能更佳,但这取决于您的查询模式。
|
||||
|
||||
2. **存储效率**: 通用表会有一些字段对特定事件类型是不必要的,可能导致存储空间浪费,但在大多数情况下影响不大。
|
||||
|
||||
3. **数据治理**: 单一事件表可能使数据结构不够清晰,需要良好的文档和模式约束。
|
||||
|
||||
## 结论
|
||||
|
||||
基于您的需求文档和数据量级,我认为**合并成单一的events表是合理的**。这样的设计更简洁,满足需求的同时,降低了维护复杂度。
|
||||
|
||||
如果您的分析需求进一步复杂化,或者查询性能出现瓶颈,可以考虑使用ClickHouse的物化视图功能,在保持单一事件表的同时,为特定查询场景创建优化的视图。
|
||||
|
||||
您认为这个分析是否符合您的期望?是否需要进一步讨论特定的分析场景?
|
||||
Reference in New Issue
Block a user