1 module lantern.describe; 2 3 import core.math; 4 import core.time; 5 6 import std.algorithm; 7 import std.array : Appender, appender; 8 import std.meta; 9 import std.range : put; 10 import std.traits; 11 import std.typecons; 12 13 import mir.math.sum; 14 15 import lantern.util; 16 17 enum isAggregator(T) = is(typeof({ 18 T aggregator; 19 20 static assert(is(T.DataType)); 21 22 T.DataType value = void; 23 .put(aggregator, value); 24 25 auto result = aggregator.result(); 26 size_t count = result.count; 27 })); 28 29 struct NumericAggregator(T) 30 { 31 alias DataType = T; 32 33 struct Result 34 { 35 size_t count; 36 Nullable!DataType min; 37 Nullable!DataType max; 38 Nullable!real p25; 39 Nullable!real p50; 40 Nullable!real p75; 41 Nullable!real mean; 42 Nullable!real std; 43 } 44 45 size_t count = 0; 46 Appender!(T[]) buffer; 47 48 Result result() 49 { 50 if (count > 0) 51 { 52 import std.math : floor; 53 54 auto data = buffer.data; 55 immutable size = data.length; 56 data.sort(); 57 58 Summator!(real, Summation.fast) summator = 0; 59 .put(summator, data); 60 real mean = summator.sum() / size; 61 summator = 0; 62 .put(summator, data.map!(a => (a - mean) ^^ 2)); 63 real std = sqrt(summator.sum() / (size - 1)); 64 65 real pos25_ = (size - 1) * 0.25; 66 size_t pos25 = cast(size_t) floor(pos25_); 67 real pos50_ = (size - 1) * 0.5; 68 size_t pos50 = cast(size_t) floor(pos50_); 69 real pos75_ = (size - 1) * 0.75; 70 size_t pos75 = cast(size_t) floor(pos75_); 71 72 Nullable!real p25 = ({ 73 if (pos25 == pos25_) 74 return data[pos25]; 75 76 auto a = pos25_ - pos25; 77 return (1 - a) * data[pos25] + a * data[pos25 + 1]; 78 })(); 79 80 Nullable!real p50 = ({ 81 if (pos50 == pos50_) 82 return data[pos50]; 83 84 auto a = pos50_ - pos50; 85 return (1 - a) * data[pos50] + a * data[pos50 + 1]; 86 })(); 87 88 Nullable!real p75 = ({ 89 if (pos75 == pos75_) 90 return data[pos75]; 91 92 auto a = pos75_ - pos75; 93 return (1 - a) * data[pos75] + a * data[pos75 + 1]; 94 })(); 95 96 return Result(count, nullable(data[0]), nullable(data[$ - 1]), p25, 97 p50, p75, nullable(mean), nullable(std)); 98 } 99 100 enum noneData = Nullable!DataType.init; 101 enum none = Nullable!real.init; 102 return Result(count, noneData, noneData, none, none, none, none, none); 103 } 104 105 void put(T value) 106 { 107 count++; 108 .put(buffer, value); 109 } 110 } 111 112 unittest 113 { 114 static assert(isAggregator!(NumericAggregator!int)); 115 static assert(isAggregator!(NumericAggregator!float)); 116 } 117 118 unittest 119 { 120 NumericAggregator!int aggregator; 121 auto result = aggregator.result(); 122 assert(result.count == 0); 123 assert(result.mean.isNull); 124 assert(result.std.isNull); 125 assert(result.min.isNull); 126 assert(result.p25.isNull); 127 assert(result.p50.isNull); 128 assert(result.p75.isNull); 129 assert(result.max.isNull); 130 } 131 132 unittest 133 { 134 import std.math : approxEqual; 135 import std.conv : to; 136 137 NumericAggregator!int aggregator; 138 139 .put(aggregator, [1, 2, 3, 4, 5]); 140 141 auto result = aggregator.result(); 142 static assert(is(typeof(result) == typeof(aggregator).Result)); 143 assert(result.count == 5); 144 assert(result.mean == 3); 145 assert(approxEqual(result.std.get, 1.581139), result.std.get.to!string); 146 assert(result.min == 1, result.min.to!string()); 147 assert(result.p25 == 2, result.p25.to!string()); 148 assert(result.p50 == 3, result.p50.to!string()); 149 assert(result.p75 == 4, result.p75.to!string()); 150 assert(result.max == 5, result.min.to!string()); 151 } 152 153 unittest 154 { 155 import std.math : approxEqual; 156 import std.conv : to; 157 158 NumericAggregator!int aggregator; 159 160 .put(aggregator, [1, 2, 3, 4, 5, 6]); 161 162 auto result = aggregator.result(); 163 static assert(is(typeof(result) == typeof(aggregator).Result)); 164 assert(result.count == 6); 165 assert(result.mean == 3.5); 166 assert(approxEqual(result.std.get, 1.870829)); 167 assert(result.min == 1, result.min.to!string()); 168 assert(result.p25 == 2.25, result.p25.to!string()); 169 assert(result.p50 == 3.5, result.p50.to!string()); 170 assert(result.p75 == 4.75, result.p75.to!string()); 171 assert(result.max == 6, result.max.to!string()); 172 } 173 174 struct DurationAggregator(T) 175 { 176 alias DataType = T; 177 178 struct Result 179 { 180 size_t count; 181 Nullable!DataType min; 182 Nullable!DataType max; 183 Nullable!DataType p25; 184 Nullable!DataType p50; 185 Nullable!DataType p75; 186 Nullable!DataType mean; 187 Nullable!DataType std; 188 } 189 190 NumericAggregator!long aggregator; 191 192 Result result() 193 { 194 auto inner = aggregator.result(); 195 196 return Result(inner.count, inner.min.toDuration(), 197 inner.max.toDuration(), inner.p25.toDuration(), 198 inner.p50.toDuration(), inner.p75.toDuration(), 199 inner.mean.toDuration(), inner.std.toDuration()); 200 } 201 202 void put(T value) 203 { 204 .put(aggregator, value.total!"hnsecs"); 205 } 206 } 207 208 unittest 209 { 210 import core.time : Duration; 211 212 static assert(isAggregator!(DurationAggregator!Duration)); 213 } 214 215 unittest 216 { 217 import core.time; 218 import std.math : approxEqual; 219 220 DurationAggregator!Duration aggregator; 221 222 .put(aggregator, [1.seconds, 2.seconds, 3.seconds]); 223 224 auto result = aggregator.result(); 225 226 assert(result.count == 3); 227 assert(result.min == 1.seconds); 228 assert(result.max == 3.seconds); 229 assert(result.mean == 2.seconds); 230 assert(result.std == 1.seconds); 231 } 232 233 struct CategoricalAggregator(T) 234 { 235 alias DataType = T; 236 237 struct Result 238 { 239 size_t count; 240 size_t unique; 241 Nullable!T top; 242 size_t freq; 243 } 244 245 size_t count; 246 size_t[T] counts; 247 248 Result result() 249 { 250 size_t keyCount; 251 size_t topCount; 252 Nullable!T topKey; 253 foreach (key, count; counts) 254 { 255 keyCount++; 256 257 if (count > topCount) 258 { 259 topCount = count; 260 topKey = key; 261 } 262 } 263 264 return Result(count, keyCount, topKey, topCount); 265 } 266 267 void put(T value) 268 { 269 count++; 270 counts[value]++; 271 } 272 } 273 274 unittest 275 { 276 enum Test 277 { 278 A, 279 B, 280 C 281 } 282 283 CategoricalAggregator!Test aggregator; 284 285 auto result = aggregator.result(); 286 assert(result.count == 0); 287 assert(result.unique == 0); 288 assert(result.top.isNull); 289 assert(result.freq == 0); 290 } 291 292 unittest 293 { 294 enum Test 295 { 296 A, 297 B, 298 C 299 } 300 301 CategoricalAggregator!Test aggregator; 302 303 .put(aggregator, [Test.A, Test.A, Test.A, Test.B]); 304 305 auto result = aggregator.result(); 306 assert(result.count == 4); 307 assert(result.unique == 2); 308 assert(result.top == Test.A); 309 assert(result.freq == 3); 310 } 311 312 unittest 313 { 314 CategoricalAggregator!string aggregator; 315 316 .put(aggregator, ["A", "A", "B", "B", "C", "C"]); 317 318 auto result = aggregator.result(); 319 assert(result.count == 6); 320 assert(result.unique == 3); 321 assert(result.top == "A"); 322 assert(result.freq == 2); 323 } 324 325 struct SeriesAggregator(T) 326 { 327 alias DataType = T; 328 329 struct Result 330 { 331 size_t count; 332 size_t unique; 333 Nullable!DataType top; 334 size_t freq; 335 Nullable!DataType first; 336 Nullable!DataType last; 337 } 338 339 size_t count; 340 size_t[DataType] counts; 341 342 Result result() 343 { 344 size_t keyCount; 345 size_t topCount; 346 Nullable!DataType topKey; 347 Nullable!DataType first; 348 Nullable!DataType last; 349 foreach (key, count; counts) 350 { 351 keyCount++; 352 if (count > topCount) 353 { 354 topCount = count; 355 topKey = key; 356 } 357 if (first.isNull || key < first.get()) 358 { 359 first = key; 360 } 361 if (last.isNull || key > last.get()) 362 { 363 last = key; 364 } 365 } 366 367 return Result(count, keyCount, topKey, topCount, first, last); 368 } 369 370 void put(DataType value) 371 { 372 count++; 373 counts[value]++; 374 } 375 } 376 377 unittest 378 { 379 import core.time : MonoTime; 380 import std.datetime : DateTime, Date, TimeOfDay, SysTime; 381 382 static assert(isAggregator!(SeriesAggregator!MonoTime)); 383 static assert(isAggregator!(SeriesAggregator!DateTime)); 384 static assert(isAggregator!(SeriesAggregator!Date)); 385 static assert(isAggregator!(SeriesAggregator!TimeOfDay)); 386 static assert(isAggregator!(SeriesAggregator!SysTime)); 387 } 388 389 unittest 390 { 391 import std.datetime : SysTime, Date, UTC; 392 393 SeriesAggregator!SysTime aggregator; 394 395 .put(aggregator, [ 396 SysTime(Date(1990, 1, 1), UTC()), SysTime(Date(1990, 1, 1), UTC()), 397 SysTime(Date(2000, 1, 1), UTC()), SysTime(Date(2010, 1, 1), UTC()), 398 SysTime(Date(2020, 1, 1), UTC()), 399 ]); 400 401 auto result = aggregator.result(); 402 assert(result.count == 5); 403 } 404 405 406 struct DescribeConfig 407 { 408 alias AggregatorResolver = DefaultResolver; 409 } 410 411 private template GetResolver(Config) 412 { 413 static if (__traits(compiles, { 414 static struct Test 415 { 416 int n; 417 } 418 419 alias ResolverOf = Config.AggregatorResolver; 420 alias Resolver = ResolverOf!Test; 421 alias Aggregator = Resolver!"n"; 422 static assert(isAggregator!Aggregator); 423 })) 424 { 425 alias GetResolver = Config.AggregatorResolver; 426 } 427 else 428 { 429 alias GetResolver = DefaultResolver; 430 } 431 } 432 433 unittest 434 { 435 alias SimpleResolver = GetResolver!DescribeConfig; 436 437 } 438 439 /// 440 auto describe(R, Config = DescribeConfig)(auto ref R datalist) 441 { 442 import std.range : ElementType; 443 444 alias RecordType = Unqual!(ElementType!R); 445 446 alias ResolverOf = GetResolver!Config; 447 alias Resolver = ResolverOf!RecordType; 448 enum canAggregate(string name) = __traits(compiles, { 449 alias T = Resolver!name; 450 static assert(isAggregator!T); 451 }); 452 453 alias AggregateNames = Filter!(canAggregate, __traits(allMembers, RecordType)); 454 alias RecordAggregators = staticMap!(Resolver, AggregateNames); 455 456 RecordAggregators aggregators; 457 foreach (data; datalist) 458 { 459 static foreach (i, name; AggregateNames) 460 { 461 .put(aggregators[i], __traits(getMember, data, name)); 462 } 463 } 464 465 static struct Results 466 { 467 static foreach (i, name; AggregateNames) 468 { 469 mixin(`RecordAggregators[i].Result ` ~ name ~ ";"); 470 } 471 } 472 473 Results results; 474 static foreach (i, name; AggregateNames) 475 { 476 __traits(getMember, results, name) = aggregators[i].result(); 477 } 478 return results; 479 } 480 481 /// 482 unittest 483 { 484 enum State 485 { 486 Uninitialized, 487 Running, 488 Finish, 489 } 490 491 struct Test 492 { 493 Object obj; 494 string text; 495 int number; 496 Duration span; 497 bool flag; 498 State state; 499 } 500 501 auto result = describe([ 502 Test(null, "A", 10, 10.msecs, true, State.Uninitialized), 503 Test(null, "A", 20, 20.msecs, true, State.Running), 504 Test(null, "B", 30, 30.msecs, false, State.Uninitialized), 505 Test(null, "B", 40, 40.msecs, false, State.Finish), 506 Test(null, "B", 50, 50.msecs, true, State.Uninitialized), 507 Test(null, "B", 60, 60.msecs, false, State.Running), 508 ]); 509 510 assert(result.text.count == 6); 511 assert(result.text.unique == 2); 512 assert(result.number.count == 6); 513 assert(result.number.min == 10); 514 assert(result.number.max == 60); 515 assert(result.span.count == 6); 516 assert(result.span.min == 10.msecs); 517 assert(result.span.max == 60.msecs); 518 assert(result.flag.count == 6); 519 assert(result.flag.unique == 2); 520 assert(result.flag.top == false); 521 assert(result.flag.freq == 3); 522 assert(result.state.count == 6); 523 assert(result.state.top == State.Uninitialized); 524 assert(result.state.freq == 3); 525 } 526 527 import std.datetime; 528 529 template DefaultResolver(T) 530 { 531 alias MemberType = MemberTypeOf!T; 532 533 template DefaultResolver(string name) 534 { 535 alias DataType = Unqual!(MemberType!name); 536 537 static if (is(DataType == enum) || is(DataType == bool) || isSomeString!DataType) 538 { 539 alias DefaultResolver = CategoricalAggregator!DataType; 540 } 541 else static if (is(DataType == Duration)) 542 { 543 alias DefaultResolver = DurationAggregator!DataType; 544 } 545 else static if (is(DataType == SysTime) || is(DataType == DateTime) 546 || is(DataType == Date) || is(DataType == TimeOfDay)) 547 { 548 alias DefaultResolver = SeriesAggregator!DataType; 549 } 550 else static if (isNumeric!DataType) 551 { 552 alias DefaultResolver = NumericAggregator!DataType; 553 } 554 else 555 { 556 static assert(false); 557 } 558 } 559 } 560 561 unittest 562 { 563 import std.datetime : SysTime, DateTime, Date, TimeOfDay; 564 565 enum State 566 { 567 A, 568 B, 569 C 570 } 571 572 struct Test 573 { 574 bool flag; 575 State state; 576 byte n1; 577 ubyte n2; 578 int n3; 579 long n4; 580 float f1; 581 double f2; 582 real f3; 583 Duration d; 584 SysTime t1; 585 DateTime t2; 586 Date t3; 587 TimeOfDay t4; 588 string s1; 589 wstring s2; 590 dstring s3; 591 } 592 593 static assert(isSomeString!dstring); 594 595 alias Resolver = DefaultResolver!Test; 596 static assert(is(Resolver!"flag" == CategoricalAggregator!bool)); 597 static assert(is(Resolver!"state" == CategoricalAggregator!State)); 598 static assert(is(Resolver!"n1" == NumericAggregator!byte)); 599 static assert(is(Resolver!"n2" == NumericAggregator!ubyte)); 600 static assert(is(Resolver!"n3" == NumericAggregator!int)); 601 static assert(is(Resolver!"n4" == NumericAggregator!long)); 602 static assert(is(Resolver!"f1" == NumericAggregator!float)); 603 static assert(is(Resolver!"f2" == NumericAggregator!double)); 604 static assert(is(Resolver!"f3" == NumericAggregator!real)); 605 static assert(is(Resolver!"d" == DurationAggregator!Duration)); 606 static assert(is(Resolver!"t1" == SeriesAggregator!SysTime)); 607 static assert(is(Resolver!"t2" == SeriesAggregator!DateTime)); 608 static assert(is(Resolver!"t3" == SeriesAggregator!Date)); 609 static assert(is(Resolver!"t4" == SeriesAggregator!TimeOfDay)); 610 static assert(is(Resolver!"s1" == CategoricalAggregator!string)); 611 static assert(is(Resolver!"s2" == CategoricalAggregator!wstring)); 612 static assert(is(Resolver!"s3" == CategoricalAggregator!dstring)); 613 } 614 615 struct DescribeResult 616 { 617 NumericAggregator!double result; 618 619 string toString() const 620 { 621 return ""; 622 } 623 }