A platform for high-performance distributed tool and library development written in C++. It can be deployed in two different cluster modes: standalone or distributed. API for v0.5.0, released on June 13, 2018.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
DoubleVector.h
Go to the documentation of this file.
1 /*****************************************************************************
2  * *
3  * Copyright 2018 Rice University *
4  * *
5  * Licensed under the Apache License, Version 2.0 (the "License"); *
6  * you may not use this file except in compliance with the License. *
7  * You may obtain a copy of the License at *
8  * *
9  * http://www.apache.org/licenses/LICENSE-2.0 *
10  * *
11  * Unless required by applicable law or agreed to in writing, software *
12  * distributed under the License is distributed on an "AS IS" BASIS, *
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
14  * See the License for the specific language governing permissions and *
15  * limitations under the License. *
16  * *
17  *****************************************************************************/
18 #ifndef DOUBLE_VECTOR_H
19 #define DOUBLE_VECTOR_H
20 
21 
22 #include "Object.h"
23 #include "Handle.h"
24 #include "PDBVector.h"
25 #include "Configuration.h"
26 #include <math.h>
27 // PRELOAD %DoubleVector%
28 
29 
30 #ifndef KMEANS_EPSILON
31 #define KMEANS_EPSILON 2.22045e-16
32 #endif
33 
34 
35 namespace pdb {
36 
37 
38 //this class wraps a double vector
39 class DoubleVector : public Object {
40 
41 public:
43  size_t size = 0;
44  double norm = -1;
45 
46 public:
48  size = 0;
49  }
50 
51  DoubleVector(size_t size) {
52  this->size = size;
53  data = makeObject<Vector<double>>(size, size);
54  }
55 
57 
58  void setValues(std::vector<double> dataToMe) {
59  double* rawData = data->c_ptr();
60  if (dataToMe.size() >= size) {
61  for (int i = 0; i < size; i++) {
62  rawData[i] = dataToMe[i];
63  }
64  } else {
65  std::cout << "my size is " << size << ", and input's size is " << dataToMe.size()
66  << std::endl;
67  }
68  this->print();
69  }
70 
71  size_t getSize() {
72  return this->size;
73  }
74 
76  return data;
77  }
78 
79  double* getRawData() {
80  if (data == nullptr) {
81  return nullptr;
82  }
83  return data->c_ptr();
84  }
85 
86  double getDouble(int i) {
87  if (i < this->size) {
88  return (*data)[i];
89  } else {
90  std::cout << "Error in DoubleVector: Cannot get the value at the pos " << i
91  << std::endl;
92  ;
93  exit(-1);
94  }
95  }
96 
97  void setDouble(int i, double val) {
98  if (i < this->size) {
99  (*data)[i] = val;
100  } else {
101  std::cout << "Error in DoubleVector: Cannot assign the value " << val << "to the pos "
102  << i << std::endl;
103  exit(-1);
104  }
105  }
106 
107  // following implementation of Spark MLLib
108  // https://github.com/apache/spark/blob/master/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
109  inline double getNorm2() {
110  if (norm < 0) {
111  norm = 0;
112  double* rawData = data->c_ptr();
113  size_t mySize = this->getSize();
114  for (int i = 0; i < mySize; i++) {
115  norm += rawData[i] * rawData[i];
116  }
117  norm = sqrt(norm);
118  }
119  return norm;
120  }
121 
122  inline double dot(DoubleVector& other) {
123  size_t mySize = this->size;
124  double* rawData = data->c_ptr();
125  double* otherRawData = other.getRawData();
126  double dotSum = 0;
127  for (size_t i = 0; i < mySize; i++) {
128  dotSum += rawData[i] * otherRawData[i];
129  }
130  return dotSum;
131  }
132 
133  // to get squared distance following SparkMLLib
134 
135  inline double getSquaredDistance(DoubleVector& other) {
136  size_t mySize = this->getSize();
137  size_t otherSize = other.getSize();
138  double* rawData = data->c_ptr();
139  double* otherRawData = other.getRawData();
140  if (mySize != otherSize) {
141  std::cout << "Error in DoubleVector: dot size doesn't match" << std::endl;
142  exit(-1);
143  }
144  double distance = 0;
145  size_t kv = 0;
146  while (kv < mySize) {
147  double score = rawData[kv] - otherRawData[kv];
148  distance += score * score;
149  kv++;
150  }
151  return distance;
152  }
153 
154 
155  void print() {
156  double* rawData = data->c_ptr();
157  for (int i = 0; i < this->getSize(); i++) {
158  std::cout << i << ": " << rawData[i] << "; ";
159  }
160  std::cout << std::endl;
161  }
162 
163 
164  // this implementation is following Spark MLLib
165  // https://github.com/apache/spark/blob/master/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
166  inline double getFastSquaredDistance(DoubleVector& other) {
167  double precision = 0.00001;
168  double myNorm = norm;
169  double otherNorm = other.norm;
170  double sumSquaredNorm = myNorm * myNorm + otherNorm * otherNorm;
171  double normDiff = myNorm - otherNorm;
172  double sqDist = 0.0;
173  double precisionBound1 =
174  2.0 * KMEANS_EPSILON * sumSquaredNorm / (normDiff * normDiff + KMEANS_EPSILON);
175  if (precisionBound1 < precision) {
176  sqDist = sumSquaredNorm - 2.0 * dot(other);
177  } else {
178  sqDist = getSquaredDistance(other);
179  }
180  return sqDist;
181  }
182 
183 
185  // std :: cout << "me:" << this->getSize() << std :: endl;
186  // this->print();
187  // std :: cout << "other:" << other.getSize() << std :: endl;
188  // other.print();
189  size_t mySize = this->getSize();
190  /*size_t otherSize = other.getSize();
191  if (mySize != otherSize) {
192  std :: cout << "Error in DoubleVector: dot size doesn't match" << std :: endl;
193  exit(-1);
194  }*/
195  double* rawData = data->c_ptr();
196  double* otherRawData = other.getRawData();
197  int blockSize = 1024;
198  int numBlocks = mySize / blockSize;
199  int remainder = mySize % blockSize;
200  for (int i = 0; i < numBlocks; i++) {
201  for (int j = 0; j < blockSize; j++) {
202  rawData[i * blockSize + j] += otherRawData[i * blockSize + j];
203  }
204  }
205  for (int i = 0; i < remainder; i++) {
206  rawData[numBlocks * blockSize + i] += otherRawData[numBlocks * blockSize + i];
207  }
208  return *this;
209  }
210 
211 
212  inline DoubleVector& operator/(int val) {
213  if (val == 0) {
214  std::cout << "Error in DoubleVector: division by zero" << std::endl;
215  exit(-1);
216  }
217  size_t mySize = this->getSize();
218  Handle<DoubleVector> result = makeObject<DoubleVector>(mySize);
219  double* rawData = data->c_ptr();
220  double* otherRawData = result->getRawData();
221  for (int i = 0; i < mySize; i++) {
222  otherRawData[i] = rawData[i] / val;
223  }
224 
225  return *result;
226  }
227 
228  // Shuffle the elements of an array into a random order, modifying the original array. Returns
229  // the original array.
231  double* rawData = data->c_ptr();
232  size_t mySize = this->getSize();
233  for (int i = mySize - 1; i >= 0; i--) {
234  int j = rand() % mySize;
235  double tmp = rawData[j];
236  rawData[j] = rawData[i];
237  rawData[i] = tmp;
238  }
239  return *this;
240  }
241 
242  inline bool equals(Handle<DoubleVector>& other) {
243  size_t mySize = this->size;
244  size_t otherSize = other->getSize();
245  if (mySize != otherSize) {
246  return false;
247  }
248  double* rawData = this->getRawData();
249  double* otherRawData = other->getRawData();
250  for (int i = 0; i < mySize; i++) {
251  if (rawData[i] != otherRawData[i]) {
252  return false;
253  }
254  }
255  return true;
256  }
257 
258 
260 };
261 }
262 
263 
264 #endif
DoubleVector & operator/(int val)
Definition: DoubleVector.h:212
#define ENABLE_DEEP_COPY
Definition: DeepCopy.h:52
double getSquaredDistance(DoubleVector &other)
Definition: DoubleVector.h:135
DoubleVector & randomizeInPlace()
Definition: DoubleVector.h:230
double * getRawData()
Definition: DoubleVector.h:79
Handle< Vector< double > > & getData()
Definition: DoubleVector.h:75
bool equals(Handle< DoubleVector > &other)
Definition: DoubleVector.h:242
double dot(DoubleVector &other)
Definition: DoubleVector.h:122
DoubleVector & operator+(DoubleVector &other)
Definition: DoubleVector.h:184
DoubleVector(size_t size)
Definition: DoubleVector.h:51
Handle< Vector< double > > data
Definition: DoubleVector.h:42
void setValues(std::vector< double > dataToMe)
Definition: DoubleVector.h:58
void setDouble(int i, double val)
Definition: DoubleVector.h:97
#define KMEANS_EPSILON
Definition: DoubleVector.h:31
double getFastSquaredDistance(DoubleVector &other)
Definition: DoubleVector.h:166
double getDouble(int i)
Definition: DoubleVector.h:86