cleaning up db
This commit is contained in:
98
backend/db/sql/clickhouse/ch-query.sh
Executable file
98
backend/db/sql/clickhouse/ch-query.sh
Executable file
@@ -0,0 +1,98 @@
|
||||
#!/bin/bash
|
||||
# 文件名: ch-query.sh
|
||||
# 用途: 执行ClickHouse SQL查询的便捷脚本
|
||||
|
||||
# 连接参数
|
||||
CH_HOST="localhost"
|
||||
CH_PORT="9000"
|
||||
CH_USER="admin"
|
||||
CH_PASSWORD="your_secure_password"
|
||||
CH_DATABASE="promote"
|
||||
|
||||
# 基本查询函数
|
||||
function ch_query() {
|
||||
clickhouse client --host $CH_HOST --port $CH_PORT --user $CH_USER --password $CH_PASSWORD --database $CH_DATABASE -q "$1"
|
||||
}
|
||||
|
||||
# 显示帮助信息
|
||||
function show_help() {
|
||||
echo "ClickHouse 查询工具"
|
||||
echo "用法: $0 [选项] [SQL查询]"
|
||||
echo ""
|
||||
echo "选项:"
|
||||
echo " -t 显示所有表"
|
||||
echo " -d <表名> 显示表结构"
|
||||
echo " -s <表名> 显示表样本数据(前10行)"
|
||||
echo " -c <表名> 计算表中的记录数"
|
||||
echo " -h, --help 显示此帮助信息"
|
||||
echo " -q \"SQL查询\" 执行自定义SQL查询"
|
||||
echo " -f <文件名> 执行SQL文件"
|
||||
echo ""
|
||||
echo "示例:"
|
||||
echo " $0 -t # 显示所有表"
|
||||
echo " $0 -d events # 显示events表结构"
|
||||
echo " $0 -q \"SELECT * FROM events LIMIT 5\" # 执行自定义查询"
|
||||
}
|
||||
|
||||
# 没有参数时显示帮助
|
||||
if [ $# -eq 0 ]; then
|
||||
show_help
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 处理命令行参数
|
||||
case "$1" in
|
||||
-t)
|
||||
ch_query "SHOW TABLES"
|
||||
;;
|
||||
-d)
|
||||
if [ -z "$2" ]; then
|
||||
echo "错误: 需要提供表名"
|
||||
exit 1
|
||||
fi
|
||||
ch_query "DESCRIBE TABLE $2"
|
||||
;;
|
||||
-s)
|
||||
if [ -z "$2" ]; then
|
||||
echo "错误: 需要提供表名"
|
||||
exit 1
|
||||
fi
|
||||
ch_query "SELECT * FROM $2 LIMIT 10"
|
||||
;;
|
||||
-c)
|
||||
if [ -z "$2" ]; then
|
||||
echo "错误: 需要提供表名"
|
||||
exit 1
|
||||
fi
|
||||
ch_query "SELECT COUNT(*) FROM $2"
|
||||
;;
|
||||
-q)
|
||||
if [ -z "$2" ]; then
|
||||
echo "错误: 需要提供SQL查询"
|
||||
exit 1
|
||||
fi
|
||||
ch_query "$2"
|
||||
;;
|
||||
-f)
|
||||
if [ -z "$2" ]; then
|
||||
echo "错误: 需要提供SQL文件"
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -f "$2" ]; then
|
||||
echo "错误: 文件 '$2' 不存在"
|
||||
exit 1
|
||||
fi
|
||||
SQL=$(cat "$2")
|
||||
ch_query "$SQL"
|
||||
;;
|
||||
-h|--help)
|
||||
show_help
|
||||
;;
|
||||
*)
|
||||
echo "未知选项: $1"
|
||||
show_help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
||||
4
backend/db/sql/clickhouse/clickhouse.md
Normal file
4
backend/db/sql/clickhouse/clickhouse.md
Normal file
@@ -0,0 +1,4 @@
|
||||
```bash
|
||||
alias clickhouse-sql='clickhouse client --host localhost --port 9000 --user admin --password your_secure_password --database promote -q'
|
||||
clickhouse-sql "SHOW TABLES"
|
||||
```
|
||||
251
backend/db/sql/clickhouse/create_events.sql
Normal file
251
backend/db/sql/clickhouse/create_events.sql
Normal file
@@ -0,0 +1,251 @@
|
||||
-- 删除旧表
|
||||
DROP TABLE IF EXISTS events;
|
||||
|
||||
DROP TABLE IF EXISTS mv_kol_performance;
|
||||
|
||||
DROP TABLE IF EXISTS mv_platform_distribution;
|
||||
|
||||
DROP TABLE IF EXISTS mv_sentiment_analysis;
|
||||
|
||||
DROP TABLE IF EXISTS mv_interaction_time;
|
||||
|
||||
DROP TABLE IF EXISTS mv_conversion_funnel;
|
||||
|
||||
-- 创建新的events表
|
||||
CREATE TABLE events (
|
||||
-- 基本信息
|
||||
event_id UUID DEFAULT generateUUIDv4(),
|
||||
timestamp DateTime DEFAULT now(),
|
||||
date Date DEFAULT toDate(now()),
|
||||
hour UInt8 DEFAULT toHour(now()),
|
||||
-- 实体关联
|
||||
user_id String,
|
||||
influencer_id String,
|
||||
content_id String,
|
||||
project_id String,
|
||||
-- 事件信息
|
||||
event_type Enum8(
|
||||
'view' = 1,
|
||||
-- 浏览
|
||||
'like' = 2,
|
||||
-- 点赞
|
||||
'unlike' = 3,
|
||||
-- 取消点赞
|
||||
'follow' = 4,
|
||||
-- 关注
|
||||
'unfollow' = 5,
|
||||
-- 取消关注
|
||||
'comment' = 6,
|
||||
-- 评论
|
||||
'share' = 7,
|
||||
-- 分享
|
||||
'click' = 8,
|
||||
-- 点击链接
|
||||
'impression' = 9,
|
||||
-- 曝光
|
||||
'purchase' = 10,
|
||||
-- 购买
|
||||
'signup' = 11 -- 注册
|
||||
),
|
||||
-- 转化漏斗
|
||||
funnel_stage Enum8(
|
||||
'exposure' = 1,
|
||||
-- 曝光
|
||||
'interest' = 2,
|
||||
-- 兴趣
|
||||
'consideration' = 3,
|
||||
-- 考虑
|
||||
'intent' = 4,
|
||||
-- 意向
|
||||
'evaluation' = 5,
|
||||
-- 评估
|
||||
'purchase' = 6 -- 购买
|
||||
),
|
||||
-- 内容信息
|
||||
platform String,
|
||||
-- 社交平台
|
||||
content_type Enum8(
|
||||
'video' = 1,
|
||||
'image' = 2,
|
||||
'text' = 3,
|
||||
'story' = 4,
|
||||
'reel' = 5,
|
||||
'live' = 6
|
||||
),
|
||||
content_status Enum8(
|
||||
-- 审核状态
|
||||
'approved' = 1,
|
||||
'pending' = 2,
|
||||
'rejected' = 3
|
||||
),
|
||||
-- 互动分析
|
||||
sentiment Enum8(
|
||||
-- 情感分析
|
||||
'positive' = 1,
|
||||
'neutral' = 2,
|
||||
'negative' = 3
|
||||
),
|
||||
comment_text String,
|
||||
-- 评论文本
|
||||
keywords Array(String),
|
||||
-- 关键词
|
||||
-- 数值指标
|
||||
interaction_value Float64,
|
||||
-- 互动价值
|
||||
followers_count UInt32,
|
||||
-- 粉丝数
|
||||
followers_change Int32,
|
||||
-- 粉丝变化量
|
||||
likes_count UInt32,
|
||||
-- 点赞数
|
||||
likes_change Int32,
|
||||
-- 点赞变化量
|
||||
views_count UInt32,
|
||||
-- 观看数
|
||||
-- 设备信息
|
||||
ip String,
|
||||
user_agent String,
|
||||
device_type String,
|
||||
referrer String,
|
||||
-- 地理信息
|
||||
geo_country String,
|
||||
geo_city String,
|
||||
-- 会话信息
|
||||
session_id String
|
||||
) ENGINE = MergeTree() PARTITION BY toYYYYMM(timestamp)
|
||||
ORDER BY
|
||||
(event_type, influencer_id, date, hour) SETTINGS index_granularity = 8192;
|
||||
|
||||
-- 创建物化视图:KOL表现概览
|
||||
CREATE MATERIALIZED VIEW mv_kol_performance ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
|
||||
ORDER BY
|
||||
(influencer_id, date) AS
|
||||
SELECT
|
||||
influencer_id,
|
||||
date,
|
||||
sum(if(event_type = 'follow', 1, 0)) - sum(if(event_type = 'unfollow', 1, 0)) AS new_followers,
|
||||
sum(if(event_type = 'like', 1, 0)) - sum(if(event_type = 'unlike', 1, 0)) AS new_likes,
|
||||
sum(if(event_type = 'view', 1, 0)) AS views,
|
||||
sum(if(event_type = 'comment', 1, 0)) AS comments,
|
||||
sum(if(event_type = 'share', 1, 0)) AS shares
|
||||
FROM
|
||||
events
|
||||
GROUP BY
|
||||
influencer_id,
|
||||
date;
|
||||
|
||||
-- 创建物化视图:平台分布
|
||||
CREATE MATERIALIZED VIEW mv_platform_distribution ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
|
||||
ORDER BY
|
||||
(platform, date) AS
|
||||
SELECT
|
||||
platform,
|
||||
date,
|
||||
count() AS events_count,
|
||||
uniqExact(user_id) AS unique_users,
|
||||
uniqExact(content_id) AS unique_contents
|
||||
FROM
|
||||
events
|
||||
GROUP BY
|
||||
platform,
|
||||
date;
|
||||
|
||||
-- 创建物化视图:情感分析
|
||||
CREATE MATERIALIZED VIEW mv_sentiment_analysis ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
|
||||
ORDER BY
|
||||
(sentiment, date) AS
|
||||
SELECT
|
||||
sentiment,
|
||||
date,
|
||||
count() AS count
|
||||
FROM
|
||||
events
|
||||
WHERE
|
||||
sentiment IS NOT NULL
|
||||
AND event_type = 'comment'
|
||||
GROUP BY
|
||||
sentiment,
|
||||
date;
|
||||
|
||||
-- 创建物化视图:用户互动时间
|
||||
CREATE MATERIALIZED VIEW mv_interaction_time ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
|
||||
ORDER BY
|
||||
(date, hour) AS
|
||||
SELECT
|
||||
date,
|
||||
hour,
|
||||
count() AS events_count,
|
||||
uniqExact(user_id) AS unique_users
|
||||
FROM
|
||||
events
|
||||
GROUP BY
|
||||
date,
|
||||
hour;
|
||||
|
||||
-- 创建物化视图:内容审核状态
|
||||
CREATE MATERIALIZED VIEW mv_content_status ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
|
||||
ORDER BY
|
||||
(content_status, date) AS
|
||||
SELECT
|
||||
content_status,
|
||||
date,
|
||||
count() AS count
|
||||
FROM
|
||||
events
|
||||
WHERE
|
||||
content_status IS NOT NULL
|
||||
GROUP BY
|
||||
content_status,
|
||||
date;
|
||||
|
||||
-- 创建物化视图:转化漏斗
|
||||
CREATE MATERIALIZED VIEW mv_conversion_funnel ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
|
||||
ORDER BY
|
||||
(funnel_stage, date) AS
|
||||
SELECT
|
||||
funnel_stage,
|
||||
date,
|
||||
count() AS stage_count,
|
||||
uniqExact(user_id) AS unique_users
|
||||
FROM
|
||||
events
|
||||
WHERE
|
||||
funnel_stage IS NOT NULL
|
||||
GROUP BY
|
||||
funnel_stage,
|
||||
date;
|
||||
|
||||
-- 创建物化视图:热门内容
|
||||
CREATE MATERIALIZED VIEW mv_popular_content ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
|
||||
ORDER BY
|
||||
(content_id, date) AS
|
||||
SELECT
|
||||
content_id,
|
||||
influencer_id,
|
||||
date,
|
||||
sum(if(event_type = 'view', 1, 0)) AS views,
|
||||
sum(if(event_type = 'like', 1, 0)) AS likes,
|
||||
sum(if(event_type = 'comment', 1, 0)) AS comments,
|
||||
sum(if(event_type = 'share', 1, 0)) AS shares
|
||||
FROM
|
||||
events
|
||||
GROUP BY
|
||||
content_id,
|
||||
influencer_id,
|
||||
date;
|
||||
|
||||
-- 创建物化视图:关键词分析
|
||||
CREATE MATERIALIZED VIEW mv_keywords ENGINE = SummingMergeTree() PARTITION BY toYYYYMM(date)
|
||||
ORDER BY
|
||||
(keyword, date) AS
|
||||
SELECT
|
||||
arrayJoin(keywords) AS keyword,
|
||||
date,
|
||||
count() AS frequency
|
||||
FROM
|
||||
events
|
||||
WHERE
|
||||
length(keywords) > 0
|
||||
GROUP BY
|
||||
keyword,
|
||||
date;
|
||||
Reference in New Issue
Block a user