|
//____________________________________________________________________
//
// Class for low-level statistics
//
//
//
/*
Given input tuples of the form
this class will calculate the sample covariance
and the average (or sample mean) values
As well as determind the minimum
and maximum
of each of
variables
The algorithms used are the same as in TPrincipal
(see the TPrincipal::AddRow
method), as to minimise rounding errors.
can be as large as the underlying OS
allows (the class contains a
matrix plus 3 vectors). The class
uses double precision variables for optimal precision.
This class is usefull to obtains various statistical moments of a
data sample. Furhter, the class is persistent. As far as possible,
the class methods are inline to improve speed.
*/
//
// $Id: BrStatistics.cxx,v 1.7 2002/08/30 20:35:04 videbaek Exp $
//
#include "BrStatistics.h"
#include <TString.h>
#include <TMath.h>
#include <TH1.h>
#include <TH2.h>
#include <TH3.h>
#include <ctype.h>
#include <stdlib.h>
#include <iostream.h>
#include <iomanip.h>
ClassImp(BrStatistics); // Low-level statistics class
//____________________________________________________________________
BrStatistics::BrStatistics()
{
// Default CTOR, do not use
Set(0, kFALSE, 0);
}
//____________________________________________________________________
BrStatistics::BrStatistics(Int_t n, Bool_t storeData, Int_t m)
{
// Create a low-level statistics object for n variables, with
// initial room for m rows. n must be bigger than 1. For one
// variable statistics, use the static AddPoint method for
// high-precision statistics.
if (n <= 1) {
Error("BrStatistics",
"at least 2 variables must be used");
MakeZombie();
return;
}
Set(n, storeData, m);
}
//____________________________________________________________________
BrStatistics::~BrStatistics()
{
// Destructor
}
//____________________________________________________________________
void BrStatistics::CombineSamples(UInt_t n1, Double_t m1, Double_t s1,
UInt_t n2, Double_t m2, Double_t s2,
Double_t& m, Double_t& s)
{
// Static method.
// Given the two partial sample of the same stocastic variable, of
// size n1 and n2, with averages m1 and m2, and square variances,
// s1 and s2, calculate the full sample average and square variance,
// and return them in m and s. The formulas used are:
//
// n1 * m1 + n2 * m2
// m = -----------------
// n1 + n2
//
// n1 * s1 + n2 * s2 n1 * n2 * (m1 - m2)^2
// s = ----------------- + ---------------------
// n1 + n2 (n1 + n2)^2
//
if (n1 + n2 <= 0)
return;
m = (n1 * m1 + n2 * m2) / (n1 + n2);
s = (n1 * s1 + n2 * s2) / (n1 + n2)
+ (m1 - m2) * (m1 - m2) * n1 * n2 / (n1 + n2) / (n1 + n2);
}
//____________________________________________________________________
void BrStatistics::AddPoint(Double_t data, UInt_t n,
Double_t& average,
Double_t& sqvar)
{
// Static method to add a point to a 1D statistics. The arguments
// are:
//
// data the n'th data point
// n number of data point (must be in [1,M])
// average on input this must be the average after n-1 data
// points. On output this contains the new average
// sqvar on input this must be the sqyare variance after n-1
// data points. On output this contains the new square
// variance
//
// The algorithm is the one used in TPrincipal::AddRow.
//
if (n < 0)
return;
if (n == 1) {
average = data;
sqvar = 0;
return;
}
Double_t cor = 1. - 1. / n;
average *= cor;
average += data / n;
sqvar *= cor;
sqvar += TMath::Power(data - average, 2) / (n - 1);
}
//____________________________________________________________________
void BrStatistics::AddRow(Double_t* row)
{
// Add one row (observation) to the statisical sample. Values are
// stored internally. Average, Max, Min, and the covariance matrix
// is calculated on the fly (It actually improves accuary). See
// also TPrincipal::AddRow.
if (IsZombie()) {
Error("AddRow", "this object is a zombie");
return;
}
// Increment sample counter
fM++;
// If the data is stored, then we reallocate memory if needed.
if (fDataStored && fM >= fMmem) {
// We need to reallocate memory
fMmem = Int_t(1.5 * fMmem);
fData.Set(fN * fMmem);
}
Int_t i,j;
Double_t cor = 1. - 1. / fM;
for (i = 0; i < fN; i++) {
if (fDataStored)
// Assinging to internal store
fData[fN * (fM - 1) + i] = row[i];
// For the first data point, we only need to set the max, min, and
// average to the tuple data. The covariance matrix is null
// still.
if (fM == 1) {
fAverage[i] = row[i];
fMax[i] = row[i];
fMin[i] = row[i];
continue;
}
// Determin the max/min on the fly
if (fMax[i] <= row[i]) fMax[i] = row[i];
if (fMin[i] >= row[i]) fMin[i] = row[i];
// Calclualte the average on the fly
fAverage[i] *= cor;
fAverage[i] += row[i] / fM;
// Calculate the covariance matrix on the fly
Double_t t1 = (row[i] - fAverage[i]) / (fM - 1);
for (j = 0; j <= i; j++) {
fCovariance[i * fN + j] *= cor;
fCovariance[i * fN + j] += t1 * (row[j] - fAverage[j]);
fCovariance[j * fN + i] = fCovariance[i * fN + j];
} // for (j = 0; j <= i; j++)
} // for (i = 0; i < fN; i++)
}
//____________________________________________________________________
void BrStatistics::Draw(Option_t* option)
{
// Draw variables as given by option. The form of the option is
// <option> ::= <var list><option list>
// where
// <var list> ::= <quantity>
// | <var number>
// | <var number>:<var number>
// | <var number>:<var number>:<var number>
//
// <option list> ::=
// | ::<histogram draw option>
//
// <quantity> ::= one of A, S, C, L, U
//
// where <histogram draw option> is any valid histogram option for
// the type of histogram drawn (1D, 2D, or 3D). See TH1::Draw.
//
// The <quantity>'s corresponds to
//
// A Average in each variable (1D)
// S Spread in each varaible (1D)
// C Covariance matrix (2D)
// L Mimimum in each varaible (1D)
// U Maximum in each variable (1D)
// F Spread^2 / average in each variable (1D)
// G Spread^2 / average^2 in each variable (1D)
// W Covariance / sqrt(average * average) in each cell (2D)
// X Covariance / (average * average) in each cell (2D)
// K Correlation matrix (2D)
// V Alias for S
// M Alias for A
if (IsZombie()) {
Error("Draw", "this object is a zombie");
return;
}
TH1* h = 0;
TString opt(option);
Int_t old = 0;
Int_t i = 0;
if (opt[0] == 'S' || opt[0] == 's'
|| opt[0] == 'V' || opt[0] == 'v') {
// Draw the variance
h = new TH1D("variance", "Variance in each variable",
fN, -.5, fN-.5);
for (i = 0; i < fN; i++) {
Double_t y = GetVariance(i);
h->SetBinContent(i+1, TMath::Sqrt(y));
h->SetBinError(i+1, TMath::Sqrt(4 * y * y / fM));
}
old = 1;
}
else if (opt[0] == 'A' || opt[0] == 'a'
|| opt[0] == 'M' || opt[0] == 'm') {
// Draw the mean
h = new TH1D("average", "Average in each variable",
fN, -.5, fN-.5);
for (i = 0; i < fN; i++) {
h->SetBinContent(i+1, GetAverage(i));
h->SetBinError(i+1, TMath::Sqrt(GetVariance(i)));
}
old = 1;
}
else if (opt[0] == 'F' || opt[0] == 'f' ) {
// Draw the fluctuation
h = new TH1D("fluctuation", "Fluctuation in each variable",
fN, -.5, fN-.5);
for (i = 0; i < fN; i++) {
Double_t y = GetFluctuation(i,i);
h->SetBinContent(i+1, y);
h->SetBinError(i+1, 4 * y * y / fM);
}
old = 1;
}
else if (opt[0] == 'G' || opt[0] == 'g' ) {
// Draw the fluctuation
h = new TH1D("fluctuation2", "Fluctuation2 in each variable",
fN, -.5, fN-.5);
for (i = 0; i < fN; i++) {
Double_t y = GetFluctuation(i,i) / GetAverage(i);
h->SetBinContent(i+1, y);
}
old = 1;
}
else if (opt[0] == 'L' || opt[0] == 'l') {
// Draw the minimums
h = new TH1D("minimum", "Minimum in each variable",
fN, -.5, fN-.5);
for (i = 0; i < fN; i++)
h->SetBinContent(i+1, GetMin(i));
}
else if (opt[0] == 'U' || opt[0] == 'u') {
// Draw the maximums
h = new TH1D("maximum", "Maximum in each variable",
fN, -.5, fN-.5);
for (i = 0; i < fN; i++)
h->SetBinContent(i+1, GetMax(i));
old = 1;
}
else if (opt[0] == 'C' || opt[0] == 'c') {
// Draw the covariance matrix
h = new TH2D("covariance", "Covariance Matrix",
fN, -.5, fN-.5, fN, -.5, fN-.5);
for (i = 0; i < fN; i++) {
for (Int_t j = 0; j < fN; j++)
h->SetBinContent(i+1, j+1, GetCovariance(i, j));
}
old = 1;
}
else if (opt[0] == 'K' || opt[0] == 'k') {
// Draw the covariance matrix
h = new TH2D("correlation", "Correlation Matrix",
fN, -.5, fN-.5, fN, -.5, fN-.5);
for (i = 0; i < fN; i++) {
for (Int_t j = 0; j < fN; j++)
h->SetBinContent(i+1, j+1, GetCorrelation(i, j));
}
old = 1;
}
else if (opt[0] == 'W' || opt[0] == 'w') {
// Draw the covariance matrix
h = new TH2D("omega", "Fluctuation Matrix",
fN, -.5, fN-.5, fN, -.5, fN-.5);
for (i = 0; i < fN; i++) {
for (Int_t j = 0; j < fN; j++)
h->SetBinContent(i+1, j+1, GetFluctuation(i, j));
}
old = 1;
}
else if (opt[0] == 'X' || opt[0] == 'x') {
// Draw the covariance matrix
h = new TH2D("omega2", "Fluctuation2 Matrix",
fN, -.5, fN-.5, fN, -.5, fN-.5);
for (i = 0; i < fN; i++) {
for (Int_t j = 0; j < fN; j++)
h->SetBinContent(i+1, j+1, GetCovariance(i, j) /
GetAverage(i) / GetAverage(j));
}
old = 1;
}
else {
if (!fDataStored) {
Warning("Draw", "data isn't stored, so I cannot draw it");
return;
}
Int_t vars[3] = { -1, -1, -1};
Int_t nvars = 0;
Int_t l = opt.Length();
// Primitive parsing of the options string.
while (nvars < 3 && i < l) {
while (isdigit(opt(i++)))
if (opt(i) == ':' || opt(i) == '0') {
vars[nvars++] = strtol(opt(old,i - old).Data(),NULL,0);
old = ++i;
break;
}
}
// Draw the histograms. A maximum of 3 variables will be drawn.
switch (nvars) {
case 1:
if (vars[0] >= fN) {
Error("Draw", "invalid variable number: %d", vars[0]);
return;
}
h = Project(vars[0]);
break;
case 2:
if (vars[0] >= fN || vars[1] >= fN) {
Error("Draw", "invalid variable number(s): %d %d",
vars[0], vars[1]);
return;
}
h = Project(vars[0], vars[1]);
break;
case 3:
if (vars[0] >= fN || vars[1] >= fN || vars[2] >= fN) {
Error("Draw", "invalid variable number(s): %d %d %d",
vars[0], vars[1], vars[2]);
return;
}
h = Project(vars[0],vars[1],vars[2]);
break;
default:
Error("Draw", "No variables specified");
}
}
if (h)
h->Draw(opt(old,opt.Length()).Data());
}
//____________________________________________________________________
Double_t* BrStatistics::GetRow(Int_t i) const
{
// Get a pointer into internal data, starting at row i
if (IsZombie()) {
Error("GetRow", "this object is a zombie");
return 0;
}
if (!fDataStored) {
Warning("GetRow", "data isn't stored, so I cannot return it");
return 0;
}
if (i > fM)
return 0;
return &fData.fArray[i * fN];
}
//____________________________________________________________________
TH1* BrStatistics::Project(Int_t i)
{
// Project variable i into a 1D histogram (TH1D). User
// need to store the histogram immediately
if (IsZombie()) {
Error("Project", "this object is a zombie");
return 0;
}
if (!fDataStored) {
Warning("Project", "data isn't stored, so I cannot project it");
return 0;
}
if (i >= fN) {
Warning("Project", "index %d out of range [0,%d)", i, fN);
return 0;
}
Int_t bins = (fM < 100 ? 10 : fM / 10);
TH1D* hist = new TH1D(Form("x_%02d",i),
Form("Variable %d", i),
bins, fMin[i], fMax[i]);
Int_t j;
for (j = 0; j < fM; j++)
hist->Fill(fData[j * fN + i]);
return hist;
}
//____________________________________________________________________
TH1* BrStatistics::Project(Int_t i, Int_t j)
{
// Project variables i and j into a 2D histogram (TH2D). User need
// to cast the returned histogram to a TH2D pointer. User need to
// store the histogram immediately
if (IsZombie()) {
Error("Project", "this object is a zombie");
return 0;
}
if (!fDataStored) {
Warning("Project", "data isn't stored, so I cannot project it");
return 0;
}
if (fN < 2) {
Warning("Project", "less then 2 variables in tuples, no projection");
return 0;
}
if (i >= fN || j >= fN) {
Warning("Project",
"indicies (%d,%d) out of range ([0,%d),[0,%d))",
i, j, fN, fN);
return 0;
}
Int_t bins = (fM < 100 ? 10 : fM / 10);
TH2D* hist = new TH2D(Form("x_%02d_%02d", i, j),
Form("Variable %d, %d", i, j),
bins, fMin[i], fMax[i],
bins, fMin[j], fMax[j]);
Int_t k;
for (k = 0; k < fM; k++)
hist->Fill(fData[k * fN + i], fData[k * fN + j]);
return hist;
}
//____________________________________________________________________
TH1* BrStatistics::Project(Int_t i, Int_t j, Int_t k)
{
// Project variables i, j and k into a 3D histogram (TH3D). User
// need to store the histogram immediately
if (IsZombie()) {
Error("Project", "this object is a zombie");
return 0;
}
if (!fDataStored) {
Warning("Project", "data isn't stored, so I cannot project it");
return 0;
}
if (fN < 3) {
Warning("Project", "less then 3 variables in tuples, no projection");
return 0;
}
if (i >= fN || j >= fN || k >= fN) {
Warning("Project",
"indicies (%d,%d,%d) out of range ([0,%d),[0,%d),[0,%d))",
i, j, k, fN, fN, fN);
return 0;
}
Int_t bins = (fM < 100 ? 10 : fM / 10);
TH3D* hist = new TH3D(Form("x_%02d_%02d_%02d",i,j,k),
Form("Variables %d, %d, %d", i, j, k),
bins, fMin[i], fMax[i],
bins, fMin[j], fMax[j],
bins, fMin[k], fMax[k]);
Int_t l;
for (l = 0; l < fM; l++)
hist->Fill(fData[k * fN + i], fData[k * fN + j]);
return hist;
}
//____________________________________________________________________
void BrStatistics::Print(Option_t* option) const
{
// Print the statistics
// Options:
// B Basic information
// A Average (a.k.a. sample mean)
// L Minimum
// U Maximum
// S Spread
// C Covariance matrix
// F Fluctuation matrix
// K Kovariance matrix
// D Data
// Default is BALUS
if (IsZombie())
Warning("Print", "this object is a zombie");
TString opt(option);
opt.ToLower();
Int_t i, j;
Bool_t showAverage = kFALSE;
Bool_t showSpread = kFALSE;
Bool_t showMin = kFALSE;
Bool_t showMax = kFALSE;
Bool_t showBasic = kFALSE;
Bool_t showCovar = kFALSE;
Bool_t showKorel = kFALSE;
Bool_t showFluct = kFALSE;
Bool_t showData = kFALSE;
Int_t nCols = 0;
for (i = 0; i < opt.Length(); i++) {
switch(opt(i)) {
case 'a': showAverage = kTRUE; nCols++; break;
case 's': showSpread = kTRUE; nCols++; break;
case 'l': showMin = kTRUE; nCols++; break;
case 'u': showMax = kTRUE; nCols++; break;
case 'b': showBasic = kTRUE; break;
case 'c': showCovar = kTRUE; break;
case 'k': showKorel = kTRUE; break;
case 'f': showFluct = kTRUE; break;
case 'd': showData = kTRUE; break;
}
}
if (opt.Contains("b"))
cout << "# variables: " << setw(6) << fN << endl
<< "# of rows: " << setw(6) << fM << endl;
if (nCols > 0) {
// Print headers
cout << "Variable "
<< (showAverage ? "| Average " : "")
<< (showMax ? "| Max " : "")
<< (showMin ? "| Min " : "")
<< (showSpread ? "| Spread " : "") << endl
<< "---------" << flush;
for (i = 0; i < nCols; i++)
cout << "+------------" << flush;
cout << endl;
for (i = 0; i < fN; i++) {
cout << setw(8) << i << " | " << flush;
if (showAverage)
cout << setw(10) << Float_t(GetAverage(i)) << " | " << flush;
if (showMax)
cout << setw(10) << Float_t(GetMax(i)) << " | " << flush;
if (showMin)
cout << setw(10) << Float_t(GetMin(i)) << " | " << flush;
if (showSpread)
cout << setw(10)
<< Float_t(TMath::Sqrt(GetCovariance(i, i)))
<< flush;
cout << endl;
}
}
if (showCovar) {
cout << "Covariance Matrix:" << endl;
cout << " |" << flush;
for (i = 0; i < fN; i++)
cout << setw(7) << i << " " << flush;
cout << endl << "-----+" << flush;
for (i = 0; i < fN; i++)
cout << "---------" << flush;
cout << endl;
for (i = 0; i < fN; i++) {
cout << setw(4) << i << " |" << flush;
for (j = 0; j <= i; j++) {
cout << setw(8) << setprecision(2) << GetCovariance(i, j)
<< " " << flush;
}
cout << endl;
}
}
if (showKorel) {
cout << "Correlation Matrix:" << endl;
cout << " |" << flush;
for (i = 0; i < fN; i++)
cout << setw(7) << i << " " << flush;
cout << endl << "-----+" << flush;
for (i = 0; i < fN; i++)
cout << "---------" << flush;
cout << endl;
for (i = 0; i < fN; i++) {
cout << setw(4) << i << " |" << flush;
for (j = 0; j <= i; j++) {
cout << setw(8) << setprecision(2) << GetCorrelation(i, j)
<< " " << flush;
}
cout << endl;
}
}
if (showFluct) {
cout << "Fluctuation Matrix:" << endl;
cout << " |" << flush;
for (i = 0; i < fN; i++)
cout << setw(7) << i << " " << flush;
cout << endl << "-----+" << flush;
for (i = 0; i < fN; i++)
cout << "---------" << flush;
cout << endl;
for (i = 0; i < fN; i++) {
cout << setw(4) << i << " |" << flush;
for (j = 0; j <= i; j++) {
cout << setw(8) << setprecision(2) << GetFluctuation(i, j)
<< " " << flush;
}
cout << endl;
}
}
if (showData) {
if (!fDataStored) {
Warning("Print", "data isn't stored, so I cannot print it");
return;
}
cout << "Row/Var " << flush;
for (i = 0; i < fN; i++)
cout << "| " << setw(8) << i << " " << flush;
cout << endl << "---------" << flush;
for (i = 0; i < fN; i++)
cout << "+----------" << flush;
cout << endl;
for (i = 0; i < fM; i++) {
cout << setw(8) << i << " " << flush;
for (j = 0; j < fN; j++)
cout << "| " << setw(8) << Float_t(fData.At(i * fN + j)) << " "
<< flush;
cout << endl;
}
}
}
//____________________________________________________________________
void BrStatistics::Set(Int_t n, Bool_t store, Int_t m)
{
// Set the dimension and initial size of the sample storage, if
// store is true. All arrays are reset
fN = n;
fDataStored = store;
fMmem = (store ? m : 0);
fAverage.Set(fN); fAverage.Reset();
fMax.Set(fN); fMax.Reset();
fMin.Set(fN); fMin.Reset();
fCovariance.Set(fN*fN); fCovariance.Reset();
fData.Set(store ? fN*fMmem : 0); fData.Reset();
}
//
// EOF
//
|