实际练习展示了无监督学习方法的应用。你将使用 scikit-learn 实现 K-Means 和 DBSCAN 进行聚类,并在一个生成的数据集上应用一种常用技术进行异常检测。这种动手操作的经历将有助于巩固你对这些算法如何工作以及如何解释其结果的理解。我们首先生成一个模拟数据集。我们将使用 scikit-learn 中的 make_blobs 创建离散的点群,然后加入一些随机散布的点,这些点可以看作是离群值或噪声。import numpy as np import pandas as pd from sklearn.datasets import make_blobs from sklearn.preprocessing import StandardScaler import plotly.express as px # 生成样本数据 X, y_true = make_blobs(n_samples=400, centers=4, cluster_std=0.80, random_state=42) # 添加一些远离聚类的噪声点 rng = np.random.RandomState(42) n_outliers = 30 outliers = rng.uniform(low=np.min(X) - 5, high=np.max(X) + 5, size=(n_outliers, 2)) X = np.vstack([X, outliers]) # 对对尺度敏感的算法(如 K-Means 和 DBSCAN)进行特征标准化 scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # 创建 DataFrame 以便可视化 df = pd.DataFrame(X_scaled, columns=['Feature 1', 'Feature 2']) # 数据的初步可视化 fig_initial = px.scatter(df, x='Feature 1', y='Feature 2', title='Synthetic Dataset with Potential Outliers', color_discrete_sequence=['#495057']) # 对未聚类的点使用灰色 fig_initial.update_layout(showlegend=False) # fig_initial.show() # 在 Python 环境中显示图表{"layout": {"title": {"text": "带有潜在离群值的模拟数据集"}, "xaxis": {"title": {"text": "特征 1"}}, "yaxis": {"title": {"text": "特征 2"}}, "colorway": ["#495057"], "showlegend": false, "template": "plotly_white"}, "data": [{"type": "scatter", "x": [-0.1407, -1.0994, 1.0277, -1.0999, 0.5868, -0.9657, 1.3333, 0.9139, -0.5425, 1.0503, -0.9569, -0.8658, 1.4888, -0.5704, 1.2312, 0.5124, 0.7864, -1.294, -0.9789, -1.2406, -1.2164, 1.1359, 0.7019, 1.6568, -1.101, -0.2347, 1.0796, -0.1379, 0.3974, -1.0173, -1.0665, 0.871, 0.3669, -0.472, -1.1459, 0.9665, 0.9813, -1.0097, -0.7846, 0.5722, -1.3466, -1.0766, 0.9912, -0.5746, -1.184, -1.4592, 1.4559, 0.8112, 0.9221, 0.6401, 1.106, -0.853, 0.5223, -0.196, -1.0974, 1.1218, 0.8076, -1.3195, -0.4492, -1.4464, 1.0315, 1.2396, 0.9347, -1.0508, -1.2227, -0.4981, -1.2317, -0.178, -0.4956, -1.2049, 0.653, 1.3812, -1.2812, -0.9394, -0.3137, -0.026, 0.4793, -1.1917, 0.579, 0.5904, -1.4075, 1.2042, 1.3355, -1.4301, 1.1796, 0.9514, 0.9065, 1.0447, -0.9378, 1.2249, 0.8301, 0.3932, -0.1822, -1.1175, 1.1579, 0.8179, -0.2471, 0.954, 1.2973, -0.1876, -0.7474, 1.2337, 0.6645, 0.5167, 0.343, -0.4198, -1.3238, -0.4518, -0.8374, 0.8626, 1.2979, -0.9468, -0.966, 0.6049, -0.297, 0.7823, 0.8005, 1.2691, 1.2789, -0.4227, -1.1512, 1.1617, -1.0609, -0.4972, -1.0459, -1.1928, -1.2507, -0.404, -1.1426, 0.7135, 1.038, 1.1142, 0.8662, 0.953, 0.875, -1.1291, 0.8807, -0.1804, 0.6456, -0.4304, 1.0115, 1.2328, 0.9183, -1.0549, -1.0544, 0.8762, 0.9617, 1.412, 0.9029, 1.3752, 1.0726, -1.1757, -1.2255, 0.6447, -1.0514, -0.8675, 0.724, -1.0342, -1.2793, -1.3291, -1.0629, -1.0499, 0.7652, 1.1709, 0.817, 0.8323, -0.627, -1.018, 1.0299, 1.2619, 0.9741, -1.1077, -0.9245, 1.3683, 0.8466, -0.3231, 0.5547, -0.2757, -0.786, 0.8479, -1.2604, 1.1134, 1.3738, -1.2446, 1.2693, 0.7628, -0.413, -0.3673, 1.1481, -0.5984, -0.857, 1.2557, -1.3063, -1.4615, -1.1838, 1.0693, -1.1197, 0.9167, -0.3066, 1.1313, 0.7927, -0.9761, 0.5715, -1.0618, -0.8806, -0.5839, 0.9319, 1.003, 0.8129, 1.068, 1.0767, 0.6772, -0.8908, -0.5365, -1.022, 1.1591, 0.8086, 1.3369, -0.9094, -1.1797, 0.9738, 1.099, -1.1366, 0.9422, 1.1731, 1.0436, 1.272, -1.2634, -0.5254, -0.5075, -0.8034, -0.9109, 1.0501, 0.9625, -1.0425, -0.8976, -1.2641, 1.2188, 1.0554, -0.9723, -1.2454, 0.8943, -1.3716, 0.9681, -0.963, 0.8296, -1.3439, -0.5657, -0.5068, -0.3491, 0.6947, -0.5227, -0.9843, 0.5515, 0.9467, 0.9095, 1.0409, -0.7745, 1.2774, -0.4679, 1.0725, -0.5888, -1.1504, 0.9305, -1.2354, 0.5948, -1.1913, 1.2305, 1.4638, -1.1734, 0.7548, -1.3545, 1.225, 1.0042, 1.2468, -1.2765, -0.9975, 1.1996, -0.3678, 1.0236, -1.1804, -1.1794, 1.0052, -0.8974, -1.0682, -1.2588, -0.5266, -0.3957, 0.9817, 1.0017, -0.5352, -0.4289, 1.2081, 1.1471, -1.0053, 1.1611, 1.0302, 0.871, -1.1139, 0.7779, -1.0842, -1.1987, -0.3755, -0.4483, -1.2899, -0.5117, 0.7213, -0.2189, 0.9803, -1.241, -1.0553, -0.7993, 1.058, 0.812, 0.737, 1.0485, 0.8861, 1.1864, 0.7884, -1.1194, -1.156, -0.5524, -1.223, -1.2573, 0.6275, -0.903, 0.8745, -0.8966, 1.3016, -0.9358, 0.7795, 0.559, 0.6593, 0.9051, -1.365, 0.8671, -0.3485, 1.1828, 0.9904, -0.4873, -0.4018, 1.0078, -0.3441, -0.8616, 0.5845, -1.0916, -1.068, 0.8667, -0.3337, 0.6409, 0.9494, 0.9515, 1.3517, -0.5303, 1.1751, -0.5874, -1.0161, -1.1064, -0.4131, 0.691, 0.6861, 0.9201, -0.8531, -1.1725, -1.0665, 0.7191, -1.3018, -1.1861, -0.563, 1.2947, 0.9699, 1.1703, 1.3062, -1.2465, 0.8114, -1.1754, 1.177, -0.9338, 0.6174, -1.1542, -1.1445, 1.0532, -1.2332, 1.1821, 1.1137, 0.9939, -0.985, 1.0243, -0.4958, -0.3992, 0.9532, 1.1678, 0.6459, 1.2372, 1.1185, 1.3594, -0.4372, 0.8968, 0.6877, -1.1731, -1.1971, 0.9246, -1.3562, -1.0278, 0.8815, 1.7774, 0.201, 0.8197, -1.6969, -0.5767, 1.2943, 1.9177, -2.7636, -0.8424, -0.1145, -1.7681, -2.7093, 1.9856, -0.8059, 1.6636, 1.3533, -0.3615, 2.6857, -1.8161, 2.4043, 0.0361, 2.8059, -1.7882, -1.0587, -2.059, 1.6951, -0.7022, 2.4761, 0.3833, 2.0588], "y": [-0.8666, -1.3076, 1.1466, 0.4787, -0.8475, -1.2978, 0.6605, 0.8718, 0.714, 0.4256, -0.8196, -1.2232, 0.4811, 0.953, 0.4931, -1.3832, 0.7797, -1.331, 0.4578, -0.9236, -1.1063, 1.2174, 0.9948, 1.2829, 0.4216, 0.6066, 0.4783, -0.7977, -0.9061, -1.0119, 0.6711, 1.0054, 0.1427, 1.0733, -0.9633, 0.7437, 0.491, -0.9161, -1.2534, -1.1416, -1.1548, -1.1194, 0.8288, 0.7664, 0.5591, 0.3396, 0.8146, 0.9518, 0.7766, -0.8617, 0.9247, -0.8838, -1.2205, 0.9336, 0.3487, 0.9102, 0.5091, -1.0738, 1.0237, -0.9905, 1.0576, 0.6369, 0.6352, 0.4794, 0.5595, 0.8968, -0.7779, -0.9835, 1.2515, 0.5617, -0.9929, 0.9237, -1.1041, -0.8775, 0.8621, -0.6849, 0.6546, -1.0216, -1.1289, -0.9066, 0.2926, 0.9491, -0.9869, 0.9693, 0.6341, 0.6737, 1.053, 0.6279, -1.1446, 0.6207, 0.7901, -1.227, 0.7906, -1.0079, 0.8611, 0.9608, 0.8524, 0.6033, 0.9035, 0.6524, -1.1019, 0.588, 0.662, -1.1907, -0.9544, 0.9727, -1.2646, 1.1534, 0.8496, 0.4639, -0.838, 1.0923, 0.823, -0.9584, -0.8054, -1.0062, -1.0041, 0.7601, 1.0145, 1.1052, -1.1101, 0.8728, 0.3247, -1.106, 0.5216, 0.8813, 0.8996, 0.8193, 0.457, -1.0145, 0.4635, 0.6285, 0.4733, 0.9403, 0.6084, 1.1098, -0.9653, -1.0408, 1.0144, 0.6763, 0.5978, -1.2139, 0.5135, -0.8991, -0.9251, -0.9663, -1.0803, 1.0567, 0.7046, 0.658, 1.1024, 0.9846, 0.5846, 0.4874, 0.5805, 0.6397, 0.6516, 0.8132, -0.9473, -0.784, -0.8091, 0.6908, 0.5801, -1.2028, -0.8661, -1.2063, 0.7175, 0.9038, -1.1497, 0.5951, -0.9887, 0.6929, -0.975, -0.8862, -0.911, 0.7555, 0.7327, 0.5639, 0.8366, -0.7963, -0.9452, 0.8443, 0.592, 0.6781, 0.5405, 0.952, -1.0689, 0.7107, -0.9613, 0.844, -1.0717, 0.8914, 1.1217, -0.8121, 0.8773, -0.8166, 0.836, 0.9099, 0.8133, -1.1503, 0.9635, 0.6772, -1.0588, 0.8235, -0.9308, -0.745, 1.0284, -1.2506, 0.4704, -0.974, 1.0008, 1.0714, 1.0788, 1.2273, 0.8939, -1.0659, 0.6827, 1.0467, 0.797, 0.6862, 0.7885, -0.8131, 0.6264, 0.6088, -1.0091, -1.2596, 0.536, 0.7983, -1.1136, 0.826, 0.7737, 0.9269, 0.5285, -1.0536, -1.069, -1.1664, 1.1098, 1.0448, -0.9114, 0.5947, -1.1356, -0.9871, -0.8602, -1.2562, -1.0156, -0.9124, 0.8714, 0.6371, -0.8954, 0.6962, 0.8474, -1.0044, -1.1812, -0.8671, -1.2392, 0.784, 1.0565, 0.9457, 1.2709, -1.0251, 0.534, -0.8715, 0.9925, 1.2658, -1.152, 0.7166, 0.4394, -1.0566, -1.1288, 0.6154, 0.6149, -0.9808, -1.0642, -1.1407, 0.6211, 0.5483, 0.752, -1.0223, 0.5054, 0.7415, -0.8241, -1.1274, 0.9468, 1.0843, 0.906, 0.9415, 0.4671, 0.6138, 0.5739, -1.1624, -0.8083, 1.0958, 0.7062, 0.8619, 0.7824, -1.1197, 1.0712, -0.8046, 0.4209, -1.063, -1.0467, 1.0466, 0.5681, 0.8338, 0.8469, 1.0606, -0.8658, 0.5806, -0.8557, -1.1336, 1.1908, 0.6739, 0.8456, 1.0095, -1.2388, 1.0619, 0.968, 0.9672, -1.0416, -1.1241, 0.7364, -0.9385, 0.759, -1.1857, 0.9836, 0.7427, -0.8048, -1.1141, 0.6893, -1.0811, 0.4271, 0.9466, -1.1202, 0.5478, 0.8205, 0.641, 0.8387, -0.8243, 0.5748, 0.6667, 0.8304, 0.8808, -1.0283, 1.0995, -0.9389, 0.8828, -0.8715, -1.0821, 0.494, 0.674, 0.8062, 1.0601, 0.9894, 0.9091, 0.6143, 0.6635, 1.0328, 0.9499, 0.6412, -1.0721, 0.8737, -1.0907, 0.8763, 0.8912, -1.0699, -1.0579, 0.5113, 0.6379, -1.1604, 0.5059, 0.9606, 1.0291, 0.7514, 0.8672, 0.7508, 0.9982, -0.9846, -1.0452, 0.938, -1.0694, 0.4534, 0.6177, 0.987, 0.6557, 0.4082, -0.7897, 0.8985, 0.8507, 0.6348, 1.2741, -1.0649, -0.7699, 1.1993, -0.9392, -1.3995, -1.1145, 0.5012, -0.2276, -0.8575, 1.863, -0.7261, 1.1205, -1.1653, 2.6689, -1.042, 0.3852, -0.436, -1.5648, 0.4107, 0.3205, -2.7582, 1.776, -0.5487, 2.7825, 0.9804, 0.0688, -1.9431, -2.5416, 0.1559], "mode": "markers", "marker": {"color": "#495057"}}]}生成数据集特征经过缩放后的初始散点图。离散的群组清晰可见,同时还有一些散布的点。应用 K-Means 聚类K-Means 的目标是将数据划分成 $k$ 个离散、不重叠的聚类。每个数据点都属于距离其最近的均值(聚类中心)所在的聚类。我们需要指定聚类的数量,$k$。根据可视化结果(以及我们生成数据的方式),$k=4$ 似乎是一个合理的起始点。from sklearn.cluster import KMeans # 实例化并拟合 K-Means kmeans = KMeans(n_clusters=4, random_state=42, n_init=10) # 对于未来版本,n_init='auto' 或 10 kmeans.fit(X_scaled) # 获取聚类分配和聚类中心 df['KMeans Cluster'] = kmeans.labels_.astype(str) # 转换为字符串以便使用离散颜色 centroids = scaler.inverse_transform(kmeans.cluster_centers_) # 将聚类中心转换回原始比例 # 可视化 K-Means 结果 fig_kmeans = px.scatter(df, x='Feature 1', y='Feature 2', color='KMeans Cluster', title='K-Means Clustering Results (k=4)', color_discrete_sequence=px.colors.qualitative.Pastel) # 使用一种美观的颜色序列 # 将聚类中心添加到图表中(转换回缩放后的坐标以便绘图) fig_kmeans.add_scatter(x=kmeans.cluster_centers_[:, 0], y=kmeans.cluster_centers_[:, 1], mode='markers', marker=dict(color='#d6336c', size=12, symbol='x'), name='聚类中心') # fig_kmeans.show(){"layout": {"title": {"text": "K-Means 聚类结果 (k=4)"}, "xaxis": {"title": {"text": "特征 1"}}, "yaxis": {"title": {"text": "特征 2"}}, "coloraxis": {"colorbar": {"title": {"text": "KMeans 聚类"}}}, "color_discrete_sequence": ["#AEC7E8", "#FFBB78", "#98DF8A", "#FF9896"], "legend": {"traceorder": "reversed"}, "template": "plotly_white"}, "data": [{"type": "scatter", "x": [-0.1407, -1.0994, 1.0277, -1.0999, 0.5868, -0.9657, 1.3333, 0.9139, -0.5425, 1.0503, -0.9569, -0.8658, 1.4888, -0.5704, 1.2312, 0.5124, 0.7864, -1.294, -0.9789, -1.2406, -1.2164, 1.1359, 0.7019, 1.6568, -1.101, -0.2347, 1.0796, -0.1379, 0.3974, -1.0173, -1.0665, 0.871, 0.3669, -0.472, -1.1459, 0.9665, 0.9813, -1.0097, -0.7846, 0.5722, -1.3466, -1.0766, 0.9912, -0.5746, -1.184, -1.4592, 1.4559, 0.8112, 0.9221, 0.6401, 1.106, -0.853, 0.5223, -0.196, -1.0974, 1.1218, 0.8076, -1.3195, -0.4492, -1.4464, 1.0315, 1.2396, 0.9347, -1.0508, -1.2227, -0.4981, -1.2317, -0.178, -0.4956, -1.2049, 0.653, 1.3812, -1.2812, -0.9394, -0.3137, -0.026, 0.4793, -1.1917, 0.579, 0.5904, -1.4075, 1.2042, 1.3355, -1.4301, 1.1796, 0.9514, 0.9065, 1.0447, -0.9378, 1.2249, 0.8301, 0.3932, -0.1822, -1.1175, 1.1579, 0.8179, -0.2471, 0.954, 1.2973, -0.1876, -0.7474, 1.2337, 0.6645, 0.5167, 0.343, -0.4198, -1.3238, -0.4518, -0.8374, 0.8626, 1.2979, -0.9468, -0.966, 0.6049, -0.297, 0.7823, 0.8005, 1.2691, 1.2789, -0.4227, -1.1512, 1.1617, -1.0609, -0.4972, -1.0459, -1.1928, -1.2507, -0.404, -1.1426, 0.7135, 1.038, 1.1142, 0.8662, 0.953, 0.875, -1.1291, 0.8807, -0.1804, 0.6456, -0.4304, 1.0115, 1.2328, 0.9183, -1.0549, -1.0544, 0.8762, 0.9617, 1.412, 0.9029, 1.3752, 1.0726, -1.1757, -1.2255, 0.6447, -1.0514, -0.8675, 0.724, -1.0342, -1.2793, -1.3291, -1.0629, -1.0499, 0.7652, 1.1709, 0.817, 0.8323, -0.627, -1.018, 1.0299, 1.2619, 0.9741, -1.1077, -0.9245, 1.3683, 0.8466, -0.3231, 0.5547, -0.2757, -0.786, 0.8479, -1.2604, 1.1134, 1.3738, -1.2446, 1.2693, 0.7628, -0.413, -0.3673, 1.1481, -0.5984, -0.857, 1.2557, -1.3063, -1.4615, -1.1838, 1.0693, -1.1197, 0.9167, -0.3066, 1.1313, 0.7927, -0.9761, 0.5715, -1.0618, -0.8806, -0.5839, 0.9319, 1.003, 0.8129, 1.068, 1.0767, 0.6772, -0.8908, -0.5365, -1.022, 1.1591, 0.8086, 1.3369, -0.9094, -1.1797, 0.9738, 1.099, -1.1366, 0.9422, 1.1731, 1.0436, 1.272, -1.2634, -0.5254, -0.5075, -0.8034, -0.9109, 1.0501, 0.9625, -1.0425, -0.8976, -1.2641, 1.2188, 1.0554, -0.9723, -1.2454, 0.8943, -1.3716, 0.9681, -0.963, 0.8296, -1.3439, -0.5657, -0.5068, -0.3491, 0.6947, -0.5227, -0.9843, 0.5515, 0.9467, 0.9095, 1.0409, -0.7745, 1.2774, -0.4679, 1.0725, -0.5888, -1.1504, 0.9305, -1.2354, 0.5948, -1.1913, 1.2305, 1.4638, -1.1734, 0.7548, -1.3545, 1.225, 1.0042, 1.2468, -1.2765, -0.9975, 1.1996, -0.3678, 1.0236, -1.1804, -1.1794, 1.0052, -0.8974, -1.0682, -1.2588, -0.5266, -0.3957, 0.9817, 1.0017, -0.5352, -0.4289, 1.2081, 1.1471, -1.0053, 1.1611, 1.0302, 0.871, -1.1139, 0.7779, -1.0842, -1.1987, -0.3755, -0.4483, -1.2899, -0.5117, 0.7213, -0.2189, 0.9803, -1.241, -1.0553, -0.7993, 1.058, 0.812, 0.737, 1.0485, 0.8861, 1.1864, 0.7884, -1.1194, -1.156, -0.5524, -1.223, -1.2573, 0.6275, -0.903, 0.8745, -0.8966, 1.3016, -0.9358, 0.7795, 0.559, 0.6593, 0.9051, -1.365, 0.8671, -0.3485, 1.1828, 0.9904, -0.4873, -0.4018, 1.0078, -0.3441, -0.8616, 0.5845, -1.0916, -1.068, 0.8667, -0.3337, 0.6409, 0.9494, 0.9515, 1.3517, -0.5303, 1.1751, -0.5874, -1.0161, -1.1064, -0.4131, 0.691, 0.6861, 0.9201, -0.8531, -1.1725, -1.0665, 0.7191, -1.3018, -1.1861, -0.563, 1.2947, 0.9699, 1.1703, 1.3062, -1.2465, 0.8114, -1.1754, 1.177, -0.9338, 0.6174, -1.1542, -1.1445, 1.0532, -1.2332, 1.1821, 1.1137, 0.9939, -0.985, 1.0243, -0.4958, -0.3992, 0.9532, 1.1678, 0.6459, 1.2372, 1.1185, 1.3594, -0.4372, 0.8968, 0.6877, -1.1731, -1.1971, 0.9246, -1.3562, -1.0278, 0.8815, 1.7774, 0.201, 0.8197, -1.6969, -0.5767, 1.2943, 1.9177, -2.7636, -0.8424, -0.1145, -1.7681, -2.7093, 1.9856, -0.8059, 1.6636, 1.3533, -0.3615, 2.6857, -1.8161, 2.4043, 0.0361, 2.8059, -1.7882, -1.0587, -2.059, 1.6951, -0.7022, 2.4761, 0.3833, 2.0588], "y": [-0.8666, -1.3076, 1.1466, 0.4787, -0.8475, -1.2978, 0.6605, 0.8718, 0.714, 0.4256, -0.8196, -1.2232, 0.4811, 0.953, 0.4931, -1.3832, 0.7797, -1.331, 0.4578, -0.9236, -1.1063, 1.2174, 0.9948, 1.2829, 0.4216, 0.6066, 0.4783, -0.7977, -0.9061, -1.0119, 0.6711, 1.0054, 0.1427, 1.0733, -0.9633, 0.7437, 0.491, -0.9161, -1.2534, -1.1416, -1.1548, -1.1194, 0.8288, 0.7664, 0.5591, 0.3396, 0.8146, 0.9518, 0.7766, -0.8617, 0.9247, -0.8838, -1.2205, 0.9336, 0.3487, 0.9102, 0.5091, -1.0738, 1.0237, -0.9905, 1.0576, 0.6369, 0.6352, 0.4794, 0.5595, 0.8968, -0.7779, -0.9835, 1.2515, 0.5617, -0.9929, 0.9237, -1.1041, -0.8775, 0.8621, -0.6849, 0.6546, -1.0216, -1.1289, -0.9066, 0.2926, 0.9491, -0.9869, 0.9693, 0.6341, 0.6737, 1.053, 0.6279, -1.1446, 0.6207, 0.7901, -1.227, 0.7906, -1.0079, 0.8611, 0.9608, 0.8524, 0.6033, 0.9035, 0.6524, -1.1019, 0.588, 0.662, -1.1907, -0.9544, 0.9727, -1.2646, 1.1534, 0.8496, 0.4639, -0.838, 1.0923, 0.823, -0.9584, -0.8054, -1.0062, -1.0041, 0.7601, 1.0145, 1.1052, -1.1101, 0.8728, 0.3247, -1.106, 0.5216, 0.8813, 0.8996, 0.8193, 0.457, -1.0145, 0.4635, 0.6285, 0.4733, 0.9403, 0.6084, 1.1098, -0.9653, -1.0408, 1.0144, 0.6763, 0.5978, -1.2139, 0.5135, -0.8991, -0.9251, -0.9663, -1.0803, 1.0567, 0.7046, 0.658, 1.1024, 0.9846, 0.5846, 0.4874, 0.5805, 0.6397, 0.6516, 0.8132, -0.9473, -0.784, -0.8091, 0.6908, 0.5801, -1.2028, -0.8661, -1.2063, 0.7175, 0.9038, -1.1497, 0.5951, -0.9887, 0.6929, -0.975, -0.8862, -0.911, 0.7555, 0.7327, 0.5639, 0.8366, -0.7963, -0.9452, 0.8443, 0.592, 0.6781, 0.5405, 0.952, -1.0689, 0.7107, -0.9613, 0.844, -1.0717, 0.8914, 1.1217, -0.8121, 0.8773, -0.8166, 0.836, 0.9099, 0.8133, -1.1503, 0.9635, 0.6772, -1.0588, 0.8235, -0.9308, -0.745, 1.0284, -1.2506, 0.4704, -0.974, 1.0008, 1.0714, 1.0788, 1.2273, 0.8939, -1.0659, 0.6827, 1.0467, 0.797, 0.6862, 0.7885, -0.8131, 0.6264, 0.6088, -1.0091, -1.2596, 0.536, 0.7983, -1.1136, 0.826, 0.7737, 0.9269, 0.5285, -1.0536, -1.069, -1.1664, 1.1098, 1.0448, -0.9114, 0.5947, -1.1356, -0.9871, -0.8602, -1.2562, -1.0156, -0.9124, 0.8714, 0.6371, -0.8954, 0.6962, 0.8474, -1.0044, -1.1812, -0.8671, -1.2392, 0.784, 1.0565, 0.9457, 1.2709, -1.0251, 0.534, -0.8715, 0.9925, 1.2658, -1.152, 0.7166, 0.4394, -1.0566, -1.1288, 0.6154, 0.6149, -0.9808, -1.0642, -1.1407, 0.6211, 0.5483, 0.752, -1.0223, 0.5054, 0.7415, -0.8241, -1.1274, 0.9468, 1.0843, 0.906, 0.9415, 0.4671, 0.6138, 0.5739, -1.1624, -0.8083, 1.0958, 0.7062, 0.8619, 0.7824, -1.1197, 1.0712, -0.8046, 0.4209, -1.063, -1.0467, 1.0466, 0.5681, 0.8338, 0.8469, 1.0606, -0.8658, 0.5806, -0.8557, -1.1336, 1.1908, 0.6739, 0.8456, 1.0095, -1.2388, 1.0619, 0.968, 0.9672, -1.0416, -1.1241, 0.7364, -0.9385, 0.759, -1.1857, 0.9836, 0.7427, -0.8048, -1.1141, 0.6893, -1.0811, 0.4271, 0.9466, -1.1202, 0.5478, 0.8205, 0.641, 0.8387, -0.8243, 0.5748, 0.6667, 0.8304, 0.8808, -1.0283, 1.0995, -0.9389, 0.8828, -0.8715, -1.0821, 0.494, 0.674, 0.8062, 1.0601, 0.9894, 0.9091, 0.6143, 0.6635, 1.0328, 0.9499, 0.6412, -1.0721, 0.8737, -1.0907, 0.8763, 0.8912, -1.0699, -1.0579, 0.5113, 0.6379, -1.1604, 0.5059, 0.9606, 1.0291, 0.7514, 0.8672, 0.7508, 0.9982, -0.9846, -1.0452, 0.938, -1.0694, 0.4534, 0.6177, 0.987, 0.6557, 0.4082, -0.7897, 0.8985, 0.8507, 0.6348, 1.2741, -1.0649, -0.7699, 1.1993, -0.9392, -1.3995, -1.1145, 0.5012, -0.2276, -0.8575, 1.863, -0.7261, 1.1205, -1.1653, 2.6689, -1.042, 0.3852, -0.436, -1.5648, 0.4107, 0.3205, -2.7582, 1.776, -0.5487, 2.7825, 0.9804, 0.0688, -1.9431, -2.5416, 0.1559], "marker": {"color": ["1", "0", "2", "1", "1", "0", "2", "2", "1", "2", "0", "0", "2", "1", "2", "1", "2", "0", "1", "0", "0", "2", "2", "2", "1", "1", "2", "1", "1", "0", "1", "2", "1", "1", "0", "2", "2", "0", "0", "1", "0", "0", "2", "1", "1", "0", "2", "2", "2", "1", "2", "0", "1", "1", "1", "2", "2", "0", "1", "0", "2", "2", "2", "1", "0", "1", "0", "1", "1", "0", "2", "2", "0", "0", "1", "1", "1", "0", "1", "1", "0", "2", "2", "0", "2", "2", "2", "2", "0", "2", "2", "1", "1", "0", "2", "2", "1", "2", "2", "1", "0", "2", "1", "1", "1", "1", "0", "1", "0", "2", "2", "0", "0", "1", "1", "2", "2", "2", "2", "1", "0", "2", "0", "1", "0", "0", "0", "1", "0", "2", "2", "2", "2", "2", "2", "0", "2", "1", "1", "1", "2", "2", "2", "0", "0", "2", "2", "2", "2", "2", "2", "0", "0", "1", "0", "0", "2", "0", "0", "0", "0", "0", "2", "2", "2", "2", "1", "0", "2", "2", "2", "0", "0", "2", "2", "1", "1", "1", "0", "2", "0", "2", "2", "0", "2", "2", "1", "1", "2", "1", "0", "2", "0", "0", "0", "2", "0", "2", "1", "2", "2", "0", "1", "0", "0", "1", "2", "2", "2", "2", "2", "1", "0", "1", "0", "2", "2", "2", "0", "0", "2", "2", "0", "2", "2", "2", "2", "0", "1", "1", "0", "0", "2", "2", "0", "0", "0", "2", "2", "0", "0", "2", "0", "2", "0", "2", "0", "1", "1", "1", "1", "1", "0", "1", "2", "1", "2", "2", "0", "2", "1", "2", "1", "0", "2", "0", "1", "0", "2", "2", "0", "2", "0", "2", "2", "2", "0", "0", "2", "1", "2", "0", "0", "2", "0", "0", "0", "1", "1", "2", "2", "1", "1", "2", "2", "0", "2", "2", "2", "0", "2", "0", "0", "1", "1", "0", "1", "2", "1", "2", "0", "0", "0", "2", "2", "2", "2", "2", "2", "2", "0", "0", "1", "0", "0", "1", "0", "2", "0", "2", "0", "2", "0", "1", "1", "1", "2", "0", "1", "1", "2", "2", "1", "1", "2", "1", "0", "1", "0", "0", "2", "1", "1", "2", "2", "2", "1", "2", "1", "0", "0", "1", "1", "1", "2", "0", "0", "0", "1", "2", "2", "2", "2", "0", "2", "0", "2", "0", "1", "0", "0", "2", "0", "2", "2", "2", "0", "2", "1", "1", "2", "2", "1", "2", "2", "2", "1", "2", "1", "0", "0", "2", "0", "0", "2", "3", "3", "2", "3", "1", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "3", "1", "3", "3", "3", "1", "3", "3", "3", "3", "3", "3", "1", "3", "3"]}, "name": "0", "legendgroup": "0", "showlegend": true, "mode": "markers"}, {"type": "scatter", "x": [-0.407403, -1.08878, 1.071304, 0.97797], "y": [0.278186, -0.810584, 0.811022, -0.672336], "mode": "markers", "marker": {"color": "#d6336c", "size": 12, "symbol": "x"}, "name": "聚类中心", "showlegend": true}]}K-Means 聚类结果,k=4。点根据其分配的聚类着色,聚类中心用 'x' 标记。注意离群点是如何被分配到最近的聚类的。K-Means 成功识别出主要群组,但它将每个点(包括我们添加的明显离群点)强制归入一个聚类。这是因为 K-Means 假定聚类是球形的,并将每个点分配给最近的聚类中心。应用 DBSCAN 聚类DBSCAN(基于密度的带噪声空间聚类应用)将紧密排列的点归为一类,将低密度区域中孤立的点标记为离群点。它不需要预先指定聚类数量,而是依赖两个参数:eps(两个样本之间,其中一个被视为另一个邻域内的最大距离)和 min_samples(一个点被视为核心点所需的邻域内的样本数量)。选择合适的 eps 和 min_samples 通常需要一些实验或专业知识。让我们尝试一些值。较小的 eps 或较大的 min_samples 将导致更多的点被归类为噪声。from sklearn.cluster import DBSCAN # 实例化并拟合 DBSCAN # 这些参数可能需要根据数据集密度进行调整 dbscan = DBSCAN(eps=0.3, min_samples=5) dbscan.fit(X_scaled) # 获取聚类分配(-1 表示噪声/离群点) df['DBSCAN Cluster'] = dbscan.labels_.astype(str) # 转换为字符串以便使用离散颜色 # 可视化 DBSCAN 结果 fig_dbscan = px.scatter(df, x='Feature 1', y='Feature 2', color='DBSCAN Cluster', title=f'DBSCAN Clustering Results (eps={dbscan.eps}, min_samples={dbscan.min_samples})', color_discrete_map={"-1": "#adb5bd"}, # 噪声点使用灰色 category_orders={"DBSCAN Cluster": sorted(df['DBSCAN Cluster'].unique(), key=int)}, # 确保 -1 排在第一位 color_discrete_sequence=px.colors.qualitative.Pastel) # 实际聚类使用的颜色 # fig_dbscan.show(){"layout": {"title": {"text": "DBSCAN 聚类结果 (eps=0.3, min_samples=5)"}, "xaxis": {"title": {"text": "特征 1"}}, "yaxis": {"title": {"text": "特征 2"}}, "coloraxis": {"colorbar": {"title": {"text": "DBSCAN 聚类"}}}, "legend": {"title": {"text": "DBSCAN 聚类"}, "traceorder": "grouped"}, "color_discrete_map": {"-1": "#adb5bd"}, "color_discrete_sequence": ["#AEC7E8", "#FFBB78", "#98DF8A", "#FF9896"], "template": "plotly_white"}, "data": [{"type": "scatter", "x": [-0.1407, -1.0994, 1.0277, -1.0999, 0.5868, -0.9657, 1.3333, 0.9139, -0.5425, 1.0503, -0.9569, -0.8658, 1.4888, -0.5704, 1.2312, 0.5124, 0.7864, -1.294, -0.9789, -1.2406, -1.2164, 1.1359, 0.7019, 1.6568, -1.101, -0.2347, 1.0796, -0.1379, 0.3974, -1.0173, -1.0665, 0.871, 0.3669, -0.472, -1.1459, 0.9665, 0.9813, -1.0097, -0.7846, 0.5722, -1.3466, -1.0766, 0.9912, -0.5746, -1.184, -1.4592, 1.4559, 0.8112, 0.9221, 0.6401, 1.106, -0.853, 0.5223, -0.196, -1.0974, 1.1218, 0.8076, -1.3195, -0.4492, -1.4464, 1.0315, 1.2396, 0.9347, -1.0508, -1.2227, -0.4981, -1.2317, -0.178, -0.4956, -1.2049, 0.653, 1.3812, -1.2812, -0.9394, -0.3137, -0.026, 0.4793, -1.1917, 0.579, 0.5904, -1.4075, 1.2042, 1.3355, -1.4301, 1.1796, 0.9514, 0.9065, 1.0447, -0.9378, 1.2249, 0.8301, 0.3932, -0.1822, -1.1175, 1.1579, 0.8179, -0.2471, 0.954, 1.2973, -0.1876, -0.7474, 1.2337, 0.6645, 0.5167, 0.343, -0.4198, -1.3238, -0.4518, -0.8374, 0.8626, 1.2979, -0.9468, -0.966, 0.6049, -0.297, 0.7823, 0.8005, 1.2691, 1.2789, -0.4227, -1.1512, 1.1617, -1.0609, -0.4972, -1.0459, -1.1928, -1.2507, -0.404, -1.1426, 0.7135, 1.038, 1.1142, 0.8662, 0.953, 0.875, -1.1291, 0.8807, -0.1804, 0.6456, -0.4304, 1.0115, 1.2328, 0.9183, -1.0549, -1.0544, 0.8762, 0.9617, 1.412, 0.9029, 1.3752, 1.0726, -1.1757, -1.2255, 0.6447, -1.0514, -0.8675, 0.724, -1.0342, -1.2793, -1.3291, -1.0629, -1.0499, 0.7652, 1.1709, 0.817, 0.8323, -0.627, -1.018, 1.0299, 1.2619, 0.9741, -1.1077, -0.9245, 1.3683, 0.8466, -0.3231, 0.5547, -0.2757, -0.786, 0.8479, -1.2604, 1.1134, 1.3738, -1.2446, 1.2693, 0.7628, -0.413, -0.3673, 1.1481, -0.5984, -0.857, 1.2557, -1.3063, -1.4615, -1.1838, 1.0693, -1.1197, 0.9167, -0.3066, 1.1313, 0.7927, -0.9761, 0.5715, -1.0618, -0.8806, -0.5839, 0.9319, 1.003, 0.8129, 1.068, 1.0767, 0.6772, -0.8908, -0.5365, -1.022, 1.1591, 0.8086, 1.3369, -0.9094, -1.1797, 0.9738, 1.099, -1.1366, 0.9422, 1.1731, 1.0436, 1.272, -1.2634, -0.5254, -0.5075, -0.8034, -0.9109, 1.0501, 0.9625, -1.0425, -0.8976, -1.2641, 1.2188, 1.0554, -0.9723, -1.2454, 0.8943, -1.3716, 0.9681, -0.963, 0.8296, -1.3439, -0.5657, -0.5068, -0.3491, 0.6947, -0.5227, -0.9843, 0.5515, 0.9467, 0.9095, 1.0409, -0.7745, 1.2774, -0.4679, 1.0725, -0.5888, -1.1504, 0.9305, -1.2354, 0.5948, -1.1913, 1.2305, 1.4638, -1.1734, 0.7548, -1.3545, 1.225, 1.0042, 1.2468, -1.2765, -0.9975, 1.1996, -0.3678, 1.0236, -1.1804, -1.1794, 1.0052, -0.8974, -1.0682, -1.2588, -0.5266, -0.3957, 0.9817, 1.0017, -0.5352, -0.4289, 1.2081, 1.1471, -1.0053, 1.1611, 1.0302, 0.871, -1.1139, 0.7779, -1.0842, -1.1987, -0.3755, -0.4483, -1.2899, -0.5117, 0.7213, -0.2189, 0.9803, -1.241, -1.0553, -0.7993, 1.058, 0.812, 0.737, 1.0485, 0.8861, 1.1864, 0.7884, -1.1194, -1.156, -0.5524, -1.223, -1.2573, 0.6275, -0.903, 0.8745, -0.8966, 1.3016, -0.9358, 0.7795, 0.559, 0.6593, 0.9051, -1.365, 0.8671, -0.3485, 1.1828, 0.9904, -0.4873, -0.4018, 1.0078, -0.3441, -0.8616, 0.5845, -1.0916, -1.068, 0.8667, -0.3337, 0.6409, 0.9494, 0.9515, 1.3517, -0.5303, 1.1751, -0.5874, -1.0161, -1.1064, -0.4131, 0.691, 0.6861, 0.9201, -0.8531, -1.1725, -1.0665, 0.7191, -1.3018, -1.1861, -0.563, 1.2947, 0.9699, 1.1703, 1.3062, -1.2465, 0.8114, -1.1754, 1.177, -0.9338, 0.6174, -1.1542, -1.1445, 1.0532, -1.2332, 1.1821, 1.1137, 0.9939, -0.985, 1.0243, -0.4958, -0.3992, 0.9532, 1.1678, 0.6459, 1.2372, 1.1185, 1.3594, -0.4372, 0.8968, 0.6877, -1.1731, -1.1971, 0.9246, -1.3562, -1.0278, 0.8815, 1.7774, 0.201, 0.8197, -1.6969, -0.5767, 1.2943, 1.9177, -2.7636, -0.8424, -0.1145, -1.7681, -2.7093, 1.9856, -0.8059, 1.6636, 1.3533, -0.3615, 2.6857, -1.8161, 2.4043, 0.0361, 2.8059, -1.7882, -1.0587, -2.059, 1.6951, -0.7022, 2.4761, 0.3833, 2.0588], "y": [-0.8666, -1.3076, 1.1466, 0.4787, -0.8475, -1.2978, 0.6605, 0.8718, 0.714, 0.4256, -0.8196, -1.2232, 0.4811, 0.953, 0.4931, -1.3832, 0.7797, -1.331, 0.4578, -0.9236, -1.1063, 1.2174, 0.9948, 1.2829, 0.4216, 0.6066, 0.4783, -0.7977, -0.9061, -1.0119, 0.6711, 1.0054, 0.1427, 1.0733, -0.9633, 0.7437, 0.491, -0.9161, -1.2534, -1.1416, -1.1548, -1.1194, 0.8288, 0.7664, 0.5591, 0.3396, 0.8146, 0.9518, 0.7766, -0.8617, 0.9247, -0.8838, -1.2205, 0.9336, 0.3487, 0.9102, 0.5091, -1.0738, 1.0237, -0.9905, 1.0576, 0.6369, 0.6352, 0.4794, 0.5595, 0.8968, -0.7779, -0.9835, 1.2515, 0.5617, -0.9929, 0.9237, -1.1041, -0.8775, 0.8621, -0.6849, 0.6546, -1.0216, -1.1289, -0.9066, 0.2926, 0.9491, -0.9869, 0.9693, 0.6341, 0.6737, 1.053, 0.6279, -1.1446, 0.6207, 0.7901, -1.227, 0.7906, -1.0079, 0.8611, 0.9608, 0.8524, 0.6033, 0.9035, 0.6524, -1.1019, 0.588, 0.662, -1.1907, -0.9544, 0.9727, -1.2646, 1.1534, 0.8496, 0.4639, -0.838, 1.0923, 0.823, -0.9584, -0.8054, -1.0062, -1.0041, 0.7601, 1.0145, 1.1052, -1.1101, 0.8728, 0.3247, -1.106, 0.5216, 0.8813, 0.8996, 0.8193, 0.457, -1.0145, 0.4635, 0.6285, 0.4733, 0.9403, 0.6084, 1.1098, -0.9653, -1.0408, 1.0144, 0.6763, 0.5978, -1.2139, 0.5135, -0.8991, -0.9251, -0.9663, -1.0803, 1.0567, 0.7046, 0.658, 1.1024, 0.9846, 0.5846, 0.4874, 0.5805, 0.6397, 0.6516, 0.8132, -0.9473, -0.784, -0.8091, 0.6908, 0.5801, -1.2028, -0.8661, -1.2063, 0.7175, 0.9038, -1.1497, 0.5951, -0.9887, 0.6929, -0.975, -0.8862, -0.911, 0.7555, 0.7327, 0.5639, 0.8366, -0.7963, -0.9452, 0.8443, 0.592, 0.6781, 0.5405, 0.952, -1.0689, 0.7107, -0.9613, 0.844, -1.0717, 0.8914, 1.1217, -0.8121, 0.8773, -0.8166, 0.836, 0.9099, 0.8133, -1.1503, 0.9635, 0.6772, -1.0588, 0.8235, -0.9308, -0.745, 1.0284, -1.2506, 0.4704, -0.974, 1.0008, 1.0714, 1.0788, 1.2273, 0.8939, -1.0659, 0.6827, 1.0467, 0.797, 0.6862, 0.7885, -0.8131, 0.6264, 0.6088, -1.0091, -1.2596, 0.536, 0.7983, -1.1136, 0.826, 0.7737, 0.9269, 0.5285, -1.0536, -1.069, -1.1664, 1.1098, 1.0448, -0.9114, 0.5947, -1.1356, -0.9871, -0.8602, -1.2562, -1.0156, -0.9124, 0.8714, 0.6371, -0.8954, 0.6962, 0.8474, -1.0044, -1.1812, -0.8671, -1.2392, 0.784, 1.0565, 0.9457, 1.2709, -1.0251, 0.534, -0.8715, 0.9925, 1.2658, -1.152, 0.7166, 0.4394, -1.0566, -1.1288, 0.6154, 0.6149, -0.9808, -1.0642, -1.1407, 0.6211, 0.5483, 0.752, -1.0223, 0.5054, 0.7415, -0.8241, -1.1274, 0.9468, 1.0843, 0.906, 0.9415, 0.4671, 0.6138, 0.5739, -1.1624, -0.8083, 1.0958, 0.7062, 0.8619, 0.7824, -1.1197, 1.0712, -0.8046, 0.4209, -1.063, -1.0467, 1.0466, 0.5681, 0.8338, 0.8469, 1.0606, -0.8658, 0.5806, -0.8557, -1.1336, 1.1908, 0.6739, 0.8456, 1.0095, -1.2388, 1.0619, 0.968, 0.9672, -1.0416, -1.1241, 0.7364, -0.9385, 0.759, -1.1857, 0.9836, 0.7427, -0.8048, -1.1141, 0.6893, -1.0811, 0.4271, 0.9466, -1.1202, 0.5478, 0.8205, 0.641, 0.8387, -0.8243, 0.5748, 0.6667, 0.8304, 0.8808, -1.0283, 1.0995, -0.9389, 0.8828, -0.8715, -1.0821, 0.494, 0.674, 0.8062, 1.0601, 0.9894, 0.9091, 0.6143, 0.6635, 1.0328, 0.9499, 0.6412, -1.0721, 0.8737, -1.0907, 0.8763, 0.8912, -1.0699, -1.0579, 0.5113, 0.6379, -1.1604, 0.5059, 0.9606, 1.0291, 0.7514, 0.8672, 0.7508, 0.9982, -0.9846, -1.0452, 0.938, -1.0694, 0.4534, 0.6177, 0.987, 0.6557, 0.4082, -0.7897, 0.8985, 0.8507, 0.6348, 1.2741, -1.0649, -0.7699, 1.1993, -0.9392, -1.3995, -1.1145, 0.5012, -0.2276, -0.8575, 1.863, -0.7261, 1.1205, -1.1653, 2.6689, -1.042, 0.3852, -0.436, -1.5648, 0.4107, 0.3205, -2.7582, 1.776, -0.5487, 2.7825, 0.9804, 0.0688, -1.9431, -2.5416, 0.1559], "marker": {"color": ["1", "0", "2", "1", "1", "0", "2", "2", "1", "2", "0", "0", "2", "1", "2", "1", "2", "0", "1", "0", "0", "2", "2", "2", "1", "1", "2", "1", "1", "0", "1", "2", "1", "1", "0", "2", "2", "0", "0", "1", "0", "0", "2", "1", "1", "0", "2", "2", "2", "1", "2", "0", "1", "1", "1", "2", "2", "0", "1", "0", "2", "2", "2", "1", "0", "1", "0", "1", "1", "0", "2", "2", "0", "0", "1", "1", "1", "0", "1", "1", "0", "2", "2", "0", "2", "2", "2", "2", "0", "2", "2", "1", "1", "0", "2", "2", "1", "2", "2", "1", "0", "2", "1", "1", "1", "1", "0", "1", "0", "2", "2", "0", "0", "1", "1", "2", "2", "2", "2", "1", "0", "2", "0", "1", "0", "0", "0", "1", "0", "2", "2", "2", "2", "2", "2", "0", "2", "1", "1", "1", "2", "2", "2", "0", "0", "2", "2", "2", "2", "2", "2", "0", "0", "1", "0", "0", "2", "0", "0", "0", "0", "0", "2", "2", "2", "2", "1", "0", "2", "2", "2", "0", "0", "2", "2", "1", "1", "1", "0", "2", "0", "2", "2", "0", "2", "2", "1", "1", "2", "1", "0", "2", "0", "0", "0", "2", "0", "2", "1", "2", "2", "0", "1", "0", "0", "1", "2", "2", "2", "2", "2", "1", "0", "1", "0", "2", "2", "2", "0", "0", "2", "2", "0", "2", "2", "2", "2", "0", "1", "1", "0", "0", "2", "2", "0", "0", "0", "2", "2", "0", "0", "2", "0", "2", "0", "2", "0", "1", "1", "1", "1", "1", "0", "1", "2", "1", "2", "2", "0", "2", "1", "2", "1", "0", "2", "0", "1", "0", "2", "2", "0", "2", "0", "2", "2", "2", "0", "0", "2", "1", "2", "0", "0", "2", "0", "0", "0", "1", "1", "2", "2", "1", "1", "2", "2", "0", "2", "2", "2", "0", "2", "0", "0", "1", "1", "0", "1", "2", "1", "2", "0", "0", "0", "2", "2", "2", "2", "2", "2", "2", "0", "0", "1", "0", "0", "1", "0", "2", "0", "2", "0", "2", "0", "1", "1", "1", "2", "0", "1", "1", "2", "2", "1", "1", "2", "1", "0", "1", "0", "0", "2", "1", "1", "2", "2", "2", "1", "2", "1", "0", "0", "1", "1", "1", "2", "0", "0", "0", "1", "2", "2", "2", "2", "0", "2", "0", "2", "0", "1", "0", "0", "2", "0", "2", "2", "2", "0", "2", "1", "1", "2", "2", "1", "2", "2", "2", "1", "2", "1", "0", "0", "2", "0", "0", "2", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1", "-1"]}, "name": "-1", "legendgroup": "-1", "showlegend": true, "mode": "markers"}]}DBSCAN 聚类结果。标记为 '-1'(灰色)的点被识别为噪声/离群点,因为它们根据选择的 eps 和 min_samples 不属于任何密集区域。将此与 K-Means 图进行比较。DBSCAN 成功识别出四个主要聚类,而且值得一提的是,它将大多数模拟离群点(以及可能在主要聚类边缘的一些点)标记为噪声(聚类标签 -1)。这种发现噪声点的能力是基于密度的聚类在处理含有离群值的数据集时的一个重要优势。基本异常检测尽管 DBSCAN 本身就能识别可被视为异常的噪声点,但其他算法是专门为异常检测设计的。让我们尝试使用孤立森林算法。它通过随机划分数据来运行,并明确识别出孤立的观测值,这意味着它们需要更少的划分才能与其余数据分离。from sklearn.ensemble import IsolationForest # 实例化并拟合孤立森林 # 'contamination' 是预期离群点的比例,可设置为 'auto' 或特定值 # 让我们根据添加的噪声点数量(30 / 430)估算,约为 0.07 iso_forest = IsolationForest(contamination=0.07, random_state=42) iso_forest.fit(X_scaled) # 预测异常(-1 为异常,1 为正常) df['Anomaly'] = iso_forest.predict(X_scaled) df['Anomaly'] = df['Anomaly'].map({1: '正常', -1: '异常'}) # 映射到可读的标签 # 可视化异常检测结果 fig_anomaly = px.scatter(df, x='Feature 1', y='Feature 2', color='Anomaly', title='使用孤立森林进行异常检测', color_discrete_map={'正常': '#1f77b4', '异常': '#d62728'}, # 标准蓝色,醒目红色 category_orders={"Anomaly": ["正常", "异常"]}) # 确保图例顺序一致 # fig_anomaly.show(){"layout": {"title": {"text": "使用孤立森林进行异常检测"}, "xaxis": {"title": {"text": "特征 1"}}, "yaxis": {"title": {"text": "特征 2"}}, "coloraxis": {"colorbar": {"title": {"text": "异常"}}}, "legend": {"title": {"text": "异常"}, "traceorder": "grouped"}, "color_discrete_map": {"正常": "#228be6", "异常": "#fa5252"}, "template": "plotly_white"}, "data": [{"type": "scatter", "x": [-0.1407, -1.0994, 1.0277, -1.0999, 0.5868, -0.9657, 1.3333, 0.9139, -0.5425, 1.0503, -0.9569, -0.8658, 1.4888, -0.5704, 1.2312, 0.5124, 0.7864, -1.294, -0.9789, -1.2406, -1.2164, 1.1359, 0.7019, 1.6568, -1.101, -0.2347, 1.0796, -0.1379, 0.3974, -1.0173, -1.0665, 0.871, 0.3669, -0.472, -1.1459, 0.9665, 0.9813, -1.0097, -0.7846, 0.5722, -1.3466, -1.0766, 0.9912, -0.5746, -1.184, -1.4592, 1.4559, 0.8112, 0.9221, 0.6401, 1.106, -0.853, 0.5223, -0.196, -1.0974, 1.1218, 0.8076, -1.3195, -0.4492, -1.4464, 1.0315, 1.2396, 0.9347, -1.0508, -1.2227, -0.4981, -1.2317, -0.178, -0.4956, -1.2049, 0.653, 1.3812, -1.2812, -0.9394, -0.3137, -0.026, 0.4793, -1.1917, 0.579, 0.5904, -1.4075, 1.2042, 1.3355, -1.4301, 1.1796, 0.9514, 0.9065, 1.0447, -0.9378, 1.2249, 0.8301, 0.3932, -0.1822, -1.1175, 1.1579, 0.8179, -0.2471, 0.954, 1.2973, -0.1876, -0.7474, 1.2337, 0.6645, 0.5167, 0.343, -0.4198, -1.3238, -0.4518, -0.8374, 0.8626, 1.2979, -0.9468, -0.966, 0.6049, -0.297, 0.7823, 0.8005, 1.2691, 1.2789, -0.4227, -1.1512, 1.1617, -1.0609, -0.4972, -1.0459, -1.1928, -1.2507, -0.404, -1.1426, 0.7135, 1.038, 1.1142, 0.8662, 0.953, 0.875, -1.1291, 0.8807, -0.1804, 0.6456, -0.4304, 1.0115, 1.2328, 0.9183, -1.0549, -1.0544, 0.8762, 0.9617, 1.412, 0.9029, 1.3752, 1.0726, -1.1757, -1.2255, 0.6447, -1.0514, -0.8675, 0.724, -1.0342, -1.2793, -1.3291, -1.0629, -1.0499, 0.7652, 1.1709, 0.817, 0.8323, -0.627, -1.018, 1.0299, 1.2619, 0.9741, -1.1077, -0.9245, 1.3683, 0.8466, -0.3231, 0.5547, -0.2757, -0.786, 0.8479, -1.2604, 1.1134, 1.3738, -1.2446, 1.2693, 0.7628, -0.413, -0.3673, 1.1481, -0.5984, -0.857, 1.2557, -1.3063, -1.4615, -1.1838, 1.0693, -1.1197, 0.9167, -0.3066, 1.1313, 0.7927, -0.9761, 0.5715, -1.0618, -0.8806, -0.5839, 0.9319, 1.003, 0.8129, 1.068, 1.0767, 0.6772, -0.8908, -0.5365, -1.022, 1.1591, 0.8086, 1.3369, -0.9094, -1.1797, 0.9738, 1.099, -1.1366, 0.9422, 1.1731, 1.0436, 1.272, -1.2634, -0.5254, -0.5075, -0.8034, -0.9109, 1.0501, 0.9625, -1.0425, -0.8976, -1.2641, 1.2188, 1.0554, -0.9723, -1.2454, 0.8943, -1.3716, 0.9681, -0.963, 0.8296, -1.3439, -0.5657, -0.5068, -0.3491, 0.6947, -0.5227, -0.9843, 0.5515, 0.9467, 0.9095, 1.0409, -0.7745, 1.2774, -0.4679, 1.0725, -0.5888, -1.1504, 0.9305, -1.2354, 0.5948, -1.1913, 1.2305, 1.4638, -1.1734, 0.7548, -1.3545, 1.225, 1.0042, 1.2468, -1.2765, -0.9975, 1.1996, -0.3678, 1.0236, -1.1804, -1.1794, 1.0052, -0.8974, -1.0682, -1.2588, -0.5266, -0.3957, 0.9817, 1.0017, -0.5352, -0.4289, 1.2081, 1.1471, -1.0053, 1.1611, 1.0302, 0.871, -1.1139, 0.7779, -1.0842, -1.1987, -0.3755, -0.4483, -1.2899, -0.5117, 0.7213, -0.2189, 0.9803, -1.241, -1.0553, -0.7993, 1.058, 0.812, 0.737, 1.0485, 0.8861, 1.1864, 0.7884, -1.1194, -1.156, -0.5524, -1.223, -1.2573, 0.6275, -0.903, 0.8745, -0.8966, 1.3016, -0.9358, 0.7795, 0.559, 0.6593, 0.9051, -1.365, 0.8671, -0.3485, 1.1828, 0.9904, -0.4873, -0.4018, 1.0078, -0.3441, -0.8616, 0.5845, -1.0916, -1.068, 0.8667, -0.3337, 0.6409, 0.9494, 0.9515, 1.3517, -0.5303, 1.1751, -0.5874, -1.0161, -1.1064, -0.4131, 0.691, 0.6861, 0.9201, -0.8531, -1.1725, -1.0665, 0.7191, -1.3018, -1.1861, -0.563, 1.2947, 0.9699, 1.1703, 1.3062, -1.2465, 0.8114, -1.1754, 1.177, -0.9338, 0.6174, -1.1542, -1.1445, 1.0532, -1.2332, 1.1821, 1.1137, 0.9939, -0.985, 1.0243, -0.4958, -0.3992, 0.9532, 1.1678, 0.6459, 1.2372, 1.1185, 1.3594, -0.4372, 0.8968, 0.6877, -1.1731, -1.1971, 0.9246, -1.3562, -1.0278, 0.8815, 1.7774, 0.201, 0.8197, -1.6969, -0.5767, 1.2943, 1.9177, -2.7636, -0.8424, -0.1145, -1.7681, -2.7093, 1.9856, -0.8059, 1.6636, 1.3533, -0.3615, 2.6857, -1.8161, 2.4043, 0.0361, 2.8059, -1.7882, -1.0587, -2.059, 1.6951, -0.7022, 2.4761, 0.3833, 2.0588], "marker": {"color": ["正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "正常", "异常", "异常", "异常", "异常", "异常", "异常", "异常", "异常", "异常", "异常", "异常", "异常", "异常", "异常", "异常", "异常", "正常", "异常", "异常", "异常", "正常", "异常", "异常", "异常", "异常", "异常", "异常", "异常", "异常", "异常"]}, "name": "正常", "legendgroup": "正常", "showlegend": true, "mode": "markers"}]}孤立森林结果,突出显示被识别为异常的点(红色)。孤立森林识别出许多与 DBSCAN 噪声点相同的点。然而,确切的集合可能因算法逻辑和参数(如 contamination 系数)而异。孤立森林是专门为了发现离群点而设计的,而 DBSCAN 则是在识别密集区域时附带发现它们。根据具体目标,一种算法可能比另一种更受青睐。本次实践演示了如何应用 K-Means 和 DBSCAN 进行聚类,以及使用孤立森林进行异常检测。你看到 K-Means 如何将所有点分配到聚类中,而 DBSCAN 可以识别噪声。孤立森林提供了一种发现离群点的针对性方法。尝试调整参数(K-Means 的 k,DBSCAN 的 eps 和 min_samples,孤立森林的 contamination)通常对于特定数据集和分析目标达成预期结果是必要的。