1 module lantern.describe;
2 
3 import core.math;
4 import core.time;
5 
6 import std.algorithm;
7 import std.array : Appender, appender;
8 import std.meta;
9 import std.range : put;
10 import std.traits;
11 import std.typecons;
12 
13 import mir.math.sum;
14 
15 import lantern.util;
16 
17 enum isAggregator(T) = is(typeof({
18             T aggregator;
19 
20             static assert(is(T.DataType));
21 
22             T.DataType value = void;
23             .put(aggregator, value);
24 
25             auto result = aggregator.result();
26             size_t count = result.count;
27         }));
28 
29 struct NumericAggregator(T)
30 {
31     alias DataType = T;
32 
33     struct Result
34     {
35         size_t count;
36         Nullable!DataType min;
37         Nullable!DataType max;
38         Nullable!real p25;
39         Nullable!real p50;
40         Nullable!real p75;
41         Nullable!real mean;
42         Nullable!real std;
43     }
44 
45     size_t count = 0;
46     Appender!(T[]) buffer;
47 
48     Result result()
49     {
50         if (count > 0)
51         {
52             import std.math : floor;
53 
54             auto data = buffer.data;
55             immutable size = data.length;
56             data.sort();
57 
58             Summator!(real, Summation.fast) summator = 0;
59             .put(summator, data);
60             real mean = summator.sum() / size;
61             summator = 0;
62             .put(summator, data.map!(a => (a - mean) ^^ 2));
63             real std = sqrt(summator.sum() / (size - 1));
64 
65             real pos25_ = (size - 1) * 0.25;
66             size_t pos25 = cast(size_t) floor(pos25_);
67             real pos50_ = (size - 1) * 0.5;
68             size_t pos50 = cast(size_t) floor(pos50_);
69             real pos75_ = (size - 1) * 0.75;
70             size_t pos75 = cast(size_t) floor(pos75_);
71 
72             Nullable!real p25 = ({
73                 if (pos25 == pos25_)
74                     return data[pos25];
75 
76                 auto a = pos25_ - pos25;
77                 return (1 - a) * data[pos25] + a * data[pos25 + 1];
78             })();
79 
80             Nullable!real p50 = ({
81                 if (pos50 == pos50_)
82                     return data[pos50];
83 
84                 auto a = pos50_ - pos50;
85                 return (1 - a) * data[pos50] + a * data[pos50 + 1];
86             })();
87 
88             Nullable!real p75 = ({
89                 if (pos75 == pos75_)
90                     return data[pos75];
91 
92                 auto a = pos75_ - pos75;
93                 return (1 - a) * data[pos75] + a * data[pos75 + 1];
94             })();
95 
96             return Result(count, nullable(data[0]), nullable(data[$ - 1]), p25,
97                     p50, p75, nullable(mean), nullable(std));
98         }
99 
100         enum noneData = Nullable!DataType.init;
101         enum none = Nullable!real.init;
102         return Result(count, noneData, noneData, none, none, none, none, none);
103     }
104 
105     void put(T value)
106     {
107         count++;
108         .put(buffer, value);
109     }
110 }
111 
112 unittest
113 {
114     static assert(isAggregator!(NumericAggregator!int));
115     static assert(isAggregator!(NumericAggregator!float));
116 }
117 
118 unittest
119 {
120     NumericAggregator!int aggregator;
121     auto result = aggregator.result();
122     assert(result.count == 0);
123     assert(result.mean.isNull);
124     assert(result.std.isNull);
125     assert(result.min.isNull);
126     assert(result.p25.isNull);
127     assert(result.p50.isNull);
128     assert(result.p75.isNull);
129     assert(result.max.isNull);
130 }
131 
132 unittest
133 {
134     import std.math : approxEqual;
135     import std.conv : to;
136 
137     NumericAggregator!int aggregator;
138 
139     .put(aggregator, [1, 2, 3, 4, 5]);
140 
141     auto result = aggregator.result();
142     static assert(is(typeof(result) == typeof(aggregator).Result));
143     assert(result.count == 5);
144     assert(result.mean == 3);
145     assert(approxEqual(result.std.get, 1.581139), result.std.get.to!string);
146     assert(result.min == 1, result.min.to!string());
147     assert(result.p25 == 2, result.p25.to!string());
148     assert(result.p50 == 3, result.p50.to!string());
149     assert(result.p75 == 4, result.p75.to!string());
150     assert(result.max == 5, result.min.to!string());
151 }
152 
153 unittest
154 {
155     import std.math : approxEqual;
156     import std.conv : to;
157 
158     NumericAggregator!int aggregator;
159 
160     .put(aggregator, [1, 2, 3, 4, 5, 6]);
161 
162     auto result = aggregator.result();
163     static assert(is(typeof(result) == typeof(aggregator).Result));
164     assert(result.count == 6);
165     assert(result.mean == 3.5);
166     assert(approxEqual(result.std.get, 1.870829));
167     assert(result.min == 1, result.min.to!string());
168     assert(result.p25 == 2.25, result.p25.to!string());
169     assert(result.p50 == 3.5, result.p50.to!string());
170     assert(result.p75 == 4.75, result.p75.to!string());
171     assert(result.max == 6, result.max.to!string());
172 }
173 
174 struct DurationAggregator(T)
175 {
176     alias DataType = T;
177 
178     struct Result
179     {
180         size_t count;
181         Nullable!DataType min;
182         Nullable!DataType max;
183         Nullable!DataType p25;
184         Nullable!DataType p50;
185         Nullable!DataType p75;
186         Nullable!DataType mean;
187         Nullable!DataType std;
188     }
189 
190     NumericAggregator!long aggregator;
191 
192     Result result()
193     {
194         auto inner = aggregator.result();
195 
196         return Result(inner.count, inner.min.toDuration(),
197                 inner.max.toDuration(), inner.p25.toDuration(),
198                 inner.p50.toDuration(), inner.p75.toDuration(),
199                 inner.mean.toDuration(), inner.std.toDuration());
200     }
201 
202     void put(T value)
203     {
204         .put(aggregator, value.total!"hnsecs");
205     }
206 }
207 
208 unittest
209 {
210     import core.time : Duration;
211 
212     static assert(isAggregator!(DurationAggregator!Duration));
213 }
214 
215 unittest
216 {
217     import core.time;
218     import std.math : approxEqual;
219 
220     DurationAggregator!Duration aggregator;
221 
222     .put(aggregator, [1.seconds, 2.seconds, 3.seconds]);
223 
224     auto result = aggregator.result();
225 
226     assert(result.count == 3);
227     assert(result.min == 1.seconds);
228     assert(result.max == 3.seconds);
229     assert(result.mean == 2.seconds);
230     assert(result.std == 1.seconds);
231 }
232 
233 struct CategoricalAggregator(T)
234 {
235     alias DataType = T;
236 
237     struct Result
238     {
239         size_t count;
240         size_t unique;
241         Nullable!T top;
242         size_t freq;
243     }
244 
245     size_t count;
246     size_t[T] counts;
247 
248     Result result()
249     {
250         size_t keyCount;
251         size_t topCount;
252         Nullable!T topKey;
253         foreach (key, count; counts)
254         {
255             keyCount++;
256 
257             if (count > topCount)
258             {
259                 topCount = count;
260                 topKey = key;
261             }
262         }
263 
264         return Result(count, keyCount, topKey, topCount);
265     }
266 
267     void put(T value)
268     {
269         count++;
270         counts[value]++;
271     }
272 }
273 
274 unittest
275 {
276     enum Test
277     {
278         A,
279         B,
280         C
281     }
282 
283     CategoricalAggregator!Test aggregator;
284 
285     auto result = aggregator.result();
286     assert(result.count == 0);
287     assert(result.unique == 0);
288     assert(result.top.isNull);
289     assert(result.freq == 0);
290 }
291 
292 unittest
293 {
294     enum Test
295     {
296         A,
297         B,
298         C
299     }
300 
301     CategoricalAggregator!Test aggregator;
302 
303     .put(aggregator, [Test.A, Test.A, Test.A, Test.B]);
304 
305     auto result = aggregator.result();
306     assert(result.count == 4);
307     assert(result.unique == 2);
308     assert(result.top == Test.A);
309     assert(result.freq == 3);
310 }
311 
312 unittest
313 {
314     CategoricalAggregator!string aggregator;
315 
316     .put(aggregator, ["A", "A", "B", "B", "C", "C"]);
317 
318     auto result = aggregator.result();
319     assert(result.count == 6);
320     assert(result.unique == 3);
321     assert(result.top == "A");
322     assert(result.freq == 2);
323 }
324 
325 struct SeriesAggregator(T)
326 {
327     alias DataType = T;
328 
329     struct Result
330     {
331         size_t count;
332         size_t unique;
333         Nullable!DataType top;
334         size_t freq;
335         Nullable!DataType first;
336         Nullable!DataType last;
337     }
338 
339     size_t count;
340     size_t[DataType] counts;
341 
342     Result result()
343     {
344         size_t keyCount;
345         size_t topCount;
346         Nullable!DataType topKey;
347         Nullable!DataType first;
348         Nullable!DataType last;
349         foreach (key, count; counts)
350         {
351             keyCount++;
352             if (count > topCount)
353             {
354                 topCount = count;
355                 topKey = key;
356             }
357             if (first.isNull || key < first.get())
358             {
359                 first = key;
360             }
361             if (last.isNull || key > last.get())
362             {
363                 last = key;
364             }
365         }
366 
367         return Result(count, keyCount, topKey, topCount, first, last);
368     }
369 
370     void put(DataType value)
371     {
372         count++;
373         counts[value]++;
374     }
375 }
376 
377 unittest
378 {
379     import core.time : MonoTime;
380     import std.datetime : DateTime, Date, TimeOfDay, SysTime;
381 
382     static assert(isAggregator!(SeriesAggregator!MonoTime));
383     static assert(isAggregator!(SeriesAggregator!DateTime));
384     static assert(isAggregator!(SeriesAggregator!Date));
385     static assert(isAggregator!(SeriesAggregator!TimeOfDay));
386     static assert(isAggregator!(SeriesAggregator!SysTime));
387 }
388 
389 unittest
390 {
391     import std.datetime : SysTime, Date, UTC;
392 
393     SeriesAggregator!SysTime aggregator;
394 
395     .put(aggregator, [
396             SysTime(Date(1990, 1, 1), UTC()), SysTime(Date(1990, 1, 1), UTC()),
397             SysTime(Date(2000, 1, 1), UTC()), SysTime(Date(2010, 1, 1), UTC()),
398             SysTime(Date(2020, 1, 1), UTC()),
399             ]);
400 
401     auto result = aggregator.result();
402     assert(result.count == 5);
403 }
404 
405 
406 struct DescribeConfig
407 {
408     alias AggregatorResolver = DefaultResolver;
409 }
410 
411 private template GetResolver(Config)
412 {
413     static if (__traits(compiles, {
414             static struct Test
415             {
416                 int n;
417             }
418 
419             alias ResolverOf = Config.AggregatorResolver;
420             alias Resolver = ResolverOf!Test;
421             alias Aggregator = Resolver!"n";
422             static assert(isAggregator!Aggregator);
423         }))
424     {
425         alias GetResolver = Config.AggregatorResolver;
426     }
427     else
428     {
429         alias GetResolver = DefaultResolver;
430     }
431 }
432 
433 unittest
434 {
435     alias SimpleResolver = GetResolver!DescribeConfig;
436 
437 }
438 
439 ///
440 auto describe(R, Config = DescribeConfig)(auto ref R datalist)
441 {
442     import std.range : ElementType;
443 
444     alias RecordType = Unqual!(ElementType!R);
445 
446     alias ResolverOf = GetResolver!Config;
447     alias Resolver = ResolverOf!RecordType;
448     enum canAggregate(string name) = __traits(compiles, {
449             alias T = Resolver!name;
450             static assert(isAggregator!T);
451         });
452 
453     alias AggregateNames = Filter!(canAggregate, __traits(allMembers, RecordType));
454     alias RecordAggregators = staticMap!(Resolver, AggregateNames);
455 
456     RecordAggregators aggregators;
457     foreach (data; datalist)
458     {
459         static foreach (i, name; AggregateNames)
460         {
461             .put(aggregators[i], __traits(getMember, data, name));
462         }
463     }
464 
465     static struct Results
466     {
467         static foreach (i, name; AggregateNames)
468         {
469             mixin(`RecordAggregators[i].Result ` ~ name ~ ";");
470         }
471     }
472 
473     Results results;
474     static foreach (i, name; AggregateNames)
475     {
476         __traits(getMember, results, name) = aggregators[i].result();
477     }
478     return results;
479 }
480 
481 ///
482 unittest
483 {
484     enum State
485     {
486         Uninitialized,
487         Running,
488         Finish,
489     }
490 
491     struct Test
492     {
493         Object obj;
494         string text;
495         int number;
496         Duration span;
497         bool flag;
498         State state;
499     }
500 
501     auto result = describe([
502             Test(null, "A", 10, 10.msecs, true, State.Uninitialized),
503             Test(null, "A", 20, 20.msecs, true, State.Running),
504             Test(null, "B", 30, 30.msecs, false, State.Uninitialized),
505             Test(null, "B", 40, 40.msecs, false, State.Finish),
506             Test(null, "B", 50, 50.msecs, true, State.Uninitialized),
507             Test(null, "B", 60, 60.msecs, false, State.Running),
508             ]);
509 
510     assert(result.text.count == 6);
511     assert(result.text.unique == 2);
512     assert(result.number.count == 6);
513     assert(result.number.min == 10);
514     assert(result.number.max == 60);
515     assert(result.span.count == 6);
516     assert(result.span.min == 10.msecs);
517     assert(result.span.max == 60.msecs);
518     assert(result.flag.count == 6);
519     assert(result.flag.unique == 2);
520     assert(result.flag.top == false);
521     assert(result.flag.freq == 3);
522     assert(result.state.count == 6);
523     assert(result.state.top == State.Uninitialized);
524     assert(result.state.freq == 3);
525 }
526 
527 import std.datetime;
528 
529 template DefaultResolver(T)
530 {
531     alias MemberType = MemberTypeOf!T;
532 
533     template DefaultResolver(string name)
534     {
535         alias DataType = Unqual!(MemberType!name);
536 
537         static if (is(DataType == enum) || is(DataType == bool) || isSomeString!DataType)
538         {
539             alias DefaultResolver = CategoricalAggregator!DataType;
540         }
541         else static if (is(DataType == Duration))
542         {
543             alias DefaultResolver = DurationAggregator!DataType;
544         }
545         else static if (is(DataType == SysTime) || is(DataType == DateTime)
546                 || is(DataType == Date) || is(DataType == TimeOfDay))
547         {
548             alias DefaultResolver = SeriesAggregator!DataType;
549         }
550         else static if (isNumeric!DataType)
551         {
552             alias DefaultResolver = NumericAggregator!DataType;
553         }
554         else
555         {
556             static assert(false);
557         }
558     }
559 }
560 
561 unittest
562 {
563     import std.datetime : SysTime, DateTime, Date, TimeOfDay;
564 
565     enum State
566     {
567         A,
568         B,
569         C
570     }
571 
572     struct Test
573     {
574         bool flag;
575         State state;
576         byte n1;
577         ubyte n2;
578         int n3;
579         long n4;
580         float f1;
581         double f2;
582         real f3;
583         Duration d;
584         SysTime t1;
585         DateTime t2;
586         Date t3;
587         TimeOfDay t4;
588         string s1;
589         wstring s2;
590         dstring s3;
591     }
592 
593     static assert(isSomeString!dstring);
594 
595     alias Resolver = DefaultResolver!Test;
596     static assert(is(Resolver!"flag" == CategoricalAggregator!bool));
597     static assert(is(Resolver!"state" == CategoricalAggregator!State));
598     static assert(is(Resolver!"n1" == NumericAggregator!byte));
599     static assert(is(Resolver!"n2" == NumericAggregator!ubyte));
600     static assert(is(Resolver!"n3" == NumericAggregator!int));
601     static assert(is(Resolver!"n4" == NumericAggregator!long));
602     static assert(is(Resolver!"f1" == NumericAggregator!float));
603     static assert(is(Resolver!"f2" == NumericAggregator!double));
604     static assert(is(Resolver!"f3" == NumericAggregator!real));
605     static assert(is(Resolver!"d" == DurationAggregator!Duration));
606     static assert(is(Resolver!"t1" == SeriesAggregator!SysTime));
607     static assert(is(Resolver!"t2" == SeriesAggregator!DateTime));
608     static assert(is(Resolver!"t3" == SeriesAggregator!Date));
609     static assert(is(Resolver!"t4" == SeriesAggregator!TimeOfDay));
610     static assert(is(Resolver!"s1" == CategoricalAggregator!string));
611     static assert(is(Resolver!"s2" == CategoricalAggregator!wstring));
612     static assert(is(Resolver!"s3" == CategoricalAggregator!dstring));
613 }
614 
615 struct DescribeResult
616 {
617     NumericAggregator!double result;
618 
619     string toString() const
620     {
621         return "";
622     }
623 }