2015年12月22日火曜日

[C#][パターンマッチング]画像データをクラスタリングして一番共通点を含むデータを選別する

クラスタリングしてその中で最も共通した要素を含む画像を1つだけ選ぶアルゴリズムです。

画像判断基準にするものは、4分木した画像の平均と偏差2つを見て判断します。


 public class ClusterCollection : List<ClusterModel>
 {
  public int Id { get; set; }
  /// <summary>
  /// モデル化
  /// </summary>
  /// <returns></returns>
  public ClusterModel ToModel()
  {
   if (1 < Count)
   {
    return new ClusterModel(ToArray());
   }
   else
   {
    return this[0];
   }
  }

  public ClusterCollection(int id)
  {
   Id = id;
  }
 }
 public class ClusterGroup
 {
  public List<ClusterCollection> Groups { get; } = new List<ClusterCollection>();

  /// <summary>
  /// クラスタ初期状態を作成
  /// </summary>
  /// <param ="collection"></param>
  public ClusterGroup(ClusterModel[] models)
  {
   var id = 1;
   foreach (var model in models)
   {
    Groups.Add(new ClusterCollection(id++) { model });
   }
  }

  /// <summary>
  /// クラスタリング作成
  /// </summary>
  public void Clustering()
  {
   //リンク作成
   var _links = new ClusterLinkCollection();
   foreach (var model1 in Groups)
   {
    var __links = new ClusterLinkCollection(Groups, model1);
    var model = __links.GetAnswer();
    _links.Add(model);
   }

   var distances = _links.Select(_ => _.Distance).ToArray();
   var dlen = distances.Length;
   var dmax = distances.Max();
   var dave = distances.Average();
   var dthr = (dmax + dave) / 2.0;
   var links = new ClusterLinkCollection();
   links.AddRange(_links.Where(_ => dave <= _.Distance).ToArray());

   //合体
   foreach (var link in links)
   {
    var numlist = Groups.Select(_ => _.Id).ToList();
    if (!numlist.Contains(link.Index1.Id)) continue;
    if (!numlist.Contains(link.Index2.Id)) continue;
    link.Index1.AddRange(link.Index2);
    Groups.Remove(link.Index2);
   }
  }

  /// <summary>
  /// 一番最もらしいグループを抽出
  /// </summary>
  /// <returns></returns>
  public ClusterCollection GetMostPopulure()
  {
   var max = Groups.Select(_ => _.Count).Max();
   var collection = Groups.Where(_ => _.Count == max).First();
   return collection;
  }
 }
 /// <summary>
 /// クラスタコレクション
 /// </summary>
 public class ClusterLinkCollection : List<ClusterLinkModel>
 {
  public ClusterLinkCollection()
  {
  }

  public ClusterLinkCollection(List<ClusterCollection> collection, ClusterCollection model)
  {
   foreach (var _model in collection)
   {
    Add(new ClusterLinkModel(_model, model));
   }
  }

  /// <summary>
  /// 一番正解率の高いリンクを返す
  /// </summary>
  /// <returns></returns>
  public ClusterLinkModel GetAnswer()
  {
   var max = this.Select(_ => _.Distance).Max();
   return this.Where(_ => _.Distance == max).First();
  }

  /// <summary>
  /// コレクションのカウント数を数える
  /// </summary>
  /// <param name="collection"></param>
  /// <returns></returns>
  public int GetCount(ClusterCollection collection)
  {
   var count = this.Where(_ => _.Index1 == collection || _.Index2 == collection).Count();
   return count;
  }

  /// <summary>
  /// 関連データ一覧を取得する
  /// </summary>
  /// <param name="collection"></param>
  /// <returns></returns>
  public List<ClusterCollection> ContainData(ClusterCollection collection)
  {
   var result = new List<ClusterCollection>();

   foreach (var link in this)
   {
    if (link.Index1 == collection)
    {
     if (!result.Contains(link.Index2))
     {
      result.Add(link.Index2);
     }
    }
    else if (link.Index2 == collection)
    {
     if (!result.Contains(link.Index1))
     {
      result.Add(link.Index1);
     }
    }
   }

   return result;
  }
 }
 /// <summary>
 /// クラスタモデル
 /// </summary>
 public class ClusterLinkModel
 {
  public ClusterCollection Index1 { get; }
  public ClusterCollection Index2 { get; }
  public double Distance { get; }

  /// <summary>
  /// コンストラクタ処理
  /// </summary>
  /// <param name="model1"></param>
  /// <param name="model2"></param>
  public ClusterLinkModel(ClusterCollection model1, ClusterCollection model2)
  {
   Index1 = model1;
   Index2 = model2;

   var m1 = model1.ToModel();
   var m2 = model2.ToModel();

   Distance = (model1 == model2) ? 0.0 : m1.Matching(m2);
  }
 }
 /// <summary>
 /// モデルクラス
 /// パターンデータで一致率を返す
 /// </summary>
 public class ClusterModel
 {
  public double[] Data { get; }
  public double[] Patterns { get; }
  public int Count => Patterns.Length;
  const int split_count = 8;

  public int KeisokuX { get; }
  public LRGB Color { get; set; }
  public int Touka { get; set; }
  public bool Empty { get; set; }
  public bool Stable { get; set; }
  public int Saturation { get; set; }
  public Bitmap ImageIn => Data.DrawImage();

  /// <summary>
  /// コンストラクタ処理
  /// </summary>
  /// <param name="x"></param>
  /// <param name="data"></param>
  /// <param name="bmp"></param>
  /// <param name="model"></param>
  public ClusterModel(int x, double[] data, Bitmap bmp, AnalyzeModel model)
  {
   KeisokuX = model.AnalyzeArea.X + x;
   Data = data.Gaussian();
   var ave = Data.Average();
   var dat1 = Data.Where(_ => ave < _).ToList();
   dat1.Sort();
   var len = dat1.Count;
   dat1.RemoveRange(0, (int)(len * 0.5));

   Data.Normalize();

   //パターン作成
   var alen = data.Length;
   var spt = (alen / split_count);
   Patterns = new double[split_count * 2];
   for (var idx = 0; idx < split_count; idx++)
   {
    Patterns[idx * 2 + 0] = data.ToList().GetRange(idx * spt, spt).ToArray().Average();
    Patterns[idx * 2 + 1] = data.ToList().GetRange(idx * spt, spt).ToArray().Sigma();
   }

   //解析
   Analyze(KeisokuX, bmp, model);
  }

  /// <summary>
  /// 解析処理
  /// </summary>
  /// <param name="bmp"></param>
  /// <param name="model"></param>
  void Analyze(int ix, Bitmap bmp, AnalyzeModel model)
  {
   var len = Data.Length;
   var gain = model.Gain;
   var sum = 0.0;
   var weight = len * gain;
   var r = 0;
   var g = 0;
   var b = 0;
   var cnt = 0;
   var iy = model.AnalyzeArea.Y;
   var min = 99999999.0;
   var max = 0.0;
   var old = Data[0];
   var bibun = Data.Bibun();
   var henbibun = bibun.Bibun();
   var ave = Data.Average();

   var wcount = 0.0;
   var dcount = 0.0;

   for (var idx = 2; idx < len - 2; idx++)
   {
    var dat = Data[idx];
    var col = bmp.GetPixel(ix, iy);

    //赤色判定個所をカウント
    if (ave <= dat)
    {
     //山の部分はRGB色
     r += col.R;
     g += col.G;
     b += col.B;
     cnt++;
     wcount += dat;
    }
    else
    {
     dcount += dat;
    }

    //黒→白へ変化している値をカウント(透過度)
    var tmp = Math.Abs(dat - old);
    sum += tmp * weight;

    //彩度の計算
    double _min = Math.Min(col.R, Math.Min(col.G, col.B));
    double _max = Math.Max(col.R, Math.Max(col.G, col.B));
    var _val = (0 == _max) ? 1 : ((_max - _min) / _max);
    min = Math.Min(min, _val);
    max = Math.Max(max, _val);

    old = dat;
    weight -= gain;
    iy++;
   }
   r /= cnt;
   g /= cnt;
   b /= cnt;
   Color = new LRGB(r, g, b);                                                  //RGB値
   Touka = (int)(sum * 10);                                                    //透過度
   Saturation = (0.0 == max) ? 0 : (int)((1 - ((max - min) / (min + max))) * 100);     //彩度
   Stable = Data[0] > Data[len - 1];                                   //上が明るくて下が暗い
   Empty = (model.Threshold0 >= Saturation);                                   //チューブ有無
  }

  /// <summary>
  /// コンストラクタ処理
  /// </summary>
  /// <param name="models"></param>
  /// <param name="split_count"></param>
  public ClusterModel(ClusterModel[] models)
  {
   Data = models.Select(_ => _.Data).ToArray().Synthesis();
   var len = Data.Length;
   var spt = (len / split_count);
   Patterns = new double[split_count * 2];
   for (var idx = 0; idx < split_count; idx++)
   {
    Patterns[idx * 2 + 0] = Data.ToList().GetRange(idx * spt, spt).ToArray().Average();
    Patterns[idx * 2 + 1] = Data.ToList().GetRange(idx * spt, spt).ToArray().Sigma();
   }
  }

  /// <summary>
  /// コサイン類似度
  /// </summary>
  /// <param name="model"></param>
  /// <returns></returns>
  public double Matching(ClusterModel model)
  {
   var sum = 0.0;
   var v1 = 0.0;
   var v2 = 0.0;
   for (var stage = 0; stage < Count; stage++)
   {
    var val1 = Patterns[stage];
    var val2 = model.Patterns[stage];
    sum += val1 * val2;
    v1 += Math.Pow(val1, 2);
    v2 += Math.Pow(val2, 2);
   }

   var result = sum / Math.Sqrt(v1 * v2);

   return result;
  }
 }

0 件のコメント:

Androider