A platform for high-performance distributed tool and library development written in C++. It can be deployed in two different cluster modes: standalone or distributed. API for v0.5.0, released on June 13, 2018.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
KMeansDoubleVector.h
Go to the documentation of this file.
1 /*****************************************************************************
2  * *
3  * Copyright 2018 Rice University *
4  * *
5  * Licensed under the Apache License, Version 2.0 (the "License"); *
6  * you may not use this file except in compliance with the License. *
7  * You may obtain a copy of the License at *
8  * *
9  * http://www.apache.org/licenses/LICENSE-2.0 *
10  * *
11  * Unless required by applicable law or agreed to in writing, software *
12  * distributed under the License is distributed on an "AS IS" BASIS, *
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
14  * See the License for the specific language governing permissions and *
15  * limitations under the License. *
16  * *
17  *****************************************************************************/
18 #ifndef KMEANS_DOUBLE_VECTOR_H
19 #define KMEANS_DOUBLE_VECTOR_H
20 
21 
22 #include "Object.h"
23 #include "Handle.h"
24 #include "PDBVector.h"
25 #include "Configuration.h"
26 #include <math.h>
27 // PRELOAD %KMeansDoubleVector%
28 
29 
30 #ifndef KMEANS_EPSILON
31 #define KMEANS_EPSILON 2.22045e-16
32 #endif
33 
34 #ifndef NUM_KMEANS_DIMENSIONS
35 #define NUM_KMEANS_DIMENSIONS 1000
36 #endif
37 
38 
39 /* This class implements a double vector based on a native array */
40 namespace pdb {
41 
42 class KMeansDoubleVector : public Object {
43 
44 public:
46  double norm = -1;
47 
48 public:
50 
51 
53 
54  void setValues(std::vector<double> dataToMe) {
55  for (int i = 0; i < NUM_KMEANS_DIMENSIONS; i++) {
56  rawData[i] = dataToMe[i];
57  }
58  }
59 
60  size_t getSize() {
61  return NUM_KMEANS_DIMENSIONS;
62  }
63 
64 
65  double* getRawData() {
66  return rawData;
67  }
68 
69  double getDouble(int i) {
70  return rawData[i];
71  }
72 
73  void setDouble(int i, double val) {
74  rawData[i] = val;
75  }
76 
77  /* Compute the 2-norm */
78  inline double getNorm2() {
79  if (norm < 0) {
80  norm = 0;
81  for (int i = 0; i < NUM_KMEANS_DIMENSIONS; i++) {
82  norm += rawData[i] * rawData[i];
83  }
84  norm = sqrt(norm);
85  }
86  return norm;
87  }
88 
89  /* Dot product */
90  inline double dot(KMeansDoubleVector& other) {
91  double* otherRawData = other.getRawData();
92  double dotSum = 0;
93  for (size_t i = 0; i < NUM_KMEANS_DIMENSIONS; i++) {
94  dotSum += rawData[i] * otherRawData[i];
95  }
96  return dotSum;
97  }
98 
99  /* Compute the squared distance */
100  inline double getSquaredDistance(KMeansDoubleVector& other) {
101  double* otherRawData = other.getRawData();
102  double distance = 0;
103  size_t kv = 0;
104  while (kv < NUM_KMEANS_DIMENSIONS) {
105  double score = rawData[kv] - otherRawData[kv];
106  distance += score * score;
107  kv++;
108  }
109  return distance;
110  }
111 
112 
113  void print() {
114  for (int i = 0; i < NUM_KMEANS_DIMENSIONS; i++) {
115  std::cout << i << ": " << rawData[i] << "; ";
116  }
117  std::cout << std::endl;
118  }
119 
120 
121  /*
122  * Another way to compute the squared distance
123  * Faster than the direct computing when both norms are given
124  */
126  double precision = 0.000001;
127  double myNorm = norm;
128  double otherNorm = other.norm;
129  double sumSquaredNorm = myNorm * myNorm + otherNorm * otherNorm;
130  double normDiff = myNorm - otherNorm;
131  double sqDist = 0.0;
132  double precisionBound1 =
133  2.0 * KMEANS_EPSILON * sumSquaredNorm / (normDiff * normDiff + KMEANS_EPSILON);
134  if (precisionBound1 < precision) {
135  sqDist = sumSquaredNorm - 2.0 * dot(other);
136  } else {
137  sqDist = getSquaredDistance(other);
138  }
139  return sqDist;
140  }
141 
142  /* Overload the + operator */
144  double* otherRawData = other.getRawData();
145  for (int i = 0; i < NUM_KMEANS_DIMENSIONS; i++) {
146  rawData[i] += otherRawData[i];
147  }
148  return *this;
149  }
150 
151  /* Overload the / operator */
152  inline KMeansDoubleVector& operator/(int val) {
153  if (val == 0) {
154  std::cout << "Error in KMeansDoubleVector: division by zero" << std::endl;
155  exit(-1);
156  }
157  Handle<KMeansDoubleVector> result = makeObject<KMeansDoubleVector>();
158  double* otherRawData = result->getRawData();
159  for (int i = 0; i < NUM_KMEANS_DIMENSIONS; i++) {
160  otherRawData[i] = rawData[i] / val;
161  }
162 
163  return *result;
164  }
165 
166  /* Shuffle the elements of an array to a random order */
168  for (int i = NUM_KMEANS_DIMENSIONS - 1; i >= 0; i--) {
169  int j = rand() % NUM_KMEANS_DIMENSIONS;
170  double tmp = rawData[j];
171  rawData[j] = rawData[i];
172  rawData[i] = tmp;
173  }
174  return *this;
175  }
176 
177  /* Judge if two KMeansDoubleVector is equal */
178  inline bool equals(Handle<KMeansDoubleVector>& other) {
179  double* otherRawData = other->getRawData();
180  for (int i = 0; i < NUM_KMEANS_DIMENSIONS; i++) {
181  if (rawData[i] != otherRawData[i]) {
182  return false;
183  }
184  }
185  return true;
186  }
187 
188 
190 };
191 }
192 
193 
194 #endif
#define KMEANS_EPSILON
#define ENABLE_DEEP_COPY
Definition: DeepCopy.h:52
#define NUM_KMEANS_DIMENSIONS
double getFastSquaredDistance(KMeansDoubleVector &other)
bool equals(Handle< KMeansDoubleVector > &other)
KMeansDoubleVector & operator/(int val)
double rawData[NUM_KMEANS_DIMENSIONS]
void setValues(std::vector< double > dataToMe)
double getSquaredDistance(KMeansDoubleVector &other)
double dot(KMeansDoubleVector &other)
KMeansDoubleVector & randomizeInPlace()
KMeansDoubleVector & operator+(KMeansDoubleVector &other)
void setDouble(int i, double val)