根据提供的数据,我们可以从以下几个核心维度进行分析:
import numpy as np
from scipy.stats import spearmanr, pearsonr
import pandas as pd
# 假设df是包含所有数据的DataFrame
top3_indices = df.nlargest(3, '引流人数').index.tolist()
correlation, _ = spearmanr(df['引流人数'], df['销售额'])
print(f"Spearman Correlation: {correlation}")
total_fans = df['粉丝数'].sum()
top3_total_fans = df.loc[top3_indices]['粉丝数'].sum()
top3引流比例 = (top3_total_fans / total_fans) * 100
print(f"TOP3引流比例: {top3引流比例:.2f}%")
import seaborn as sns
sns.scatterplot(x='粉丝数', y='引流人数', data=df)
plt.title('Fan Count vs. Traffic')
plt.xlabel('Number of Fans')
plt.ylabel('Number of Visitors from Short Videos')
plt.show()
# 线性回归模型
from sklearn.linear_model import LinearRegression
X = df['粉丝数'].values.reshape(-1, 1)
y = df['引流人数']
model = LinearRegression().fit(X, y)
print(f"回归方程: y = {model.coef_[0]:.4f} * X + {model.intercept_:.4f}")
以上分析数据来源:互联岛