-- 创建新的events表 CREATE TABLE IF NOT EXISTS events ( -- 基本信息 event_id UUID DEFAULT generateUUIDv4(), timestamp DateTime DEFAULT now(), date Date DEFAULT toDate(now()), hour UInt8 DEFAULT toHour(now()), -- 实体关联 user_id String, influencer_id String, content_id String, project_id String, -- 事件信息 event_type Enum8( 'view' = 1, -- 浏览 'like' = 2, -- 点赞 'unlike' = 3, -- 取消点赞 'follow' = 4, -- 关注 'unfollow' = 5, -- 取消关注 'comment' = 6, -- 评论 'share' = 7, -- 分享 'click' = 8, -- 点击链接 'impression' = 9, -- 曝光 'purchase' = 10, -- 购买 'signup' = 11 -- 注册 ), -- 转化漏斗 funnel_stage Enum8( 'exposure' = 1, -- 曝光 'interest' = 2, -- 兴趣 'consideration' = 3, -- 考虑 'intent' = 4, -- 意向 'evaluation' = 5, -- 评估 'purchase' = 6 -- 购买 ), -- 内容信息 platform String, -- 社交平台 content_type Enum8( 'video' = 1, 'image' = 2, 'text' = 3, 'story' = 4, 'reel' = 5, 'live' = 6 ), content_status Enum8( -- 审核状态 'approved' = 1, 'pending' = 2, 'rejected' = 3 ), -- 互动分析 sentiment Enum8( -- 情感分析 'positive' = 1, 'neutral' = 2, 'negative' = 3 ), comment_text String, -- 评论文本 keywords Array(String), -- 关键词 -- 数值指标 interaction_value Float64, -- 互动价值 followers_count UInt32, -- 粉丝数 followers_change Int32, -- 粉丝变化量 likes_count UInt32, -- 点赞数 likes_change Int32, -- 点赞变化量 views_count UInt32, -- 观看数 -- 设备信息 ip String, user_agent String, device_type String, referrer String, -- 地理信息 geo_country String, geo_city String, -- 会话信息 session_id String ) ENGINE = MergeTree() PARTITION BY toYYYYMM(timestamp) ORDER BY (event_type, influencer_id, date, hour) SETTINGS index_granularity = 8192; -- 创建influencers表 CREATE TABLE IF NOT EXISTS influencers ( influencer_id String, name String, platform String, profile_url String, project_id String, date Date DEFAULT toDate(now()), followers UInt32 DEFAULT 0, PRIMARY KEY (influencer_id) ) ENGINE = MergeTree(); -- 创建posts表 CREATE TABLE IF NOT EXISTS posts ( post_id String, title String, influencer_id String, project_id String, platform String, type String DEFAULT 'post', format String DEFAULT 'text', date Date DEFAULT toDate(now()), timestamp DateTime DEFAULT now(), created_at DateTime DEFAULT now(), views UInt32 DEFAULT 0, likes UInt32 DEFAULT 0, comments UInt32 DEFAULT 0, shares UInt32 DEFAULT 0, PRIMARY KEY (post_id) ) ENGINE = MergeTree(); CREATE TABLE IF NOT EXISTS sync_logs ( timestamp DateTime DEFAULT now(), duration_ms UInt64, posts_synced UInt32, comments_synced UInt32, influencer_changes_synced UInt32, projects_synced UInt32, success UInt8, error_messages String ) ENGINE = MergeTree() ORDER BY (timestamp); -- 创建专门的comments表存储评论数据 CREATE TABLE IF NOT EXISTS comments ( -- 基本标识信息 comment_id String, -- 评论唯一ID post_id String, -- 关联的帖子ID user_id String, -- 发表评论的用户ID -- 时间信息 created_at DateTime DEFAULT now(), -- 评论创建时间 date Date DEFAULT toDate(created_at), -- 日期(用于分区) -- 评论内容信息 content String, -- 评论内容 sentiment_score Float64, -- 情感分数(-1到1) sentiment Enum8( -- 情感分类 'positive' = 1, 'neutral' = 2, 'negative' = 3 ), -- 关联信息 project_id String, -- 项目ID influencer_id String, -- 网红ID platform String, -- 评论所在平台 -- 互动信息 likes_count UInt32 DEFAULT 0, -- 点赞数量 replies_count UInt32 DEFAULT 0, -- 回复数量 -- 元数据 parent_comment_id String DEFAULT '', -- 父评论ID(用于回复) is_reply UInt8 DEFAULT 0, -- 是否为回复(0=否,1=是) -- 同步信息 is_synced UInt8 DEFAULT 1, -- 是否已同步(0=否,1=是) last_updated DateTime DEFAULT now(), -- 最后更新时间 -- 分析信息 keywords Array(String), -- 提取的关键词 topics Array(String), -- 关联的话题 -- 内部处理信息 is_active UInt8 DEFAULT 1, -- 是否活跃(0=已删除/隐藏,1=活跃) is_spam UInt8 DEFAULT 0 -- 是否为垃圾评论(0=否,1=是) ) ENGINE = MergeTree() PARTITION BY toYYYYMM(date) ORDER BY (post_id, created_at, comment_id) SETTINGS index_granularity = 8192;