Commit a6aa5b0f authored by unknown's avatar unknown
Browse files

WL#3310 Update ndb_size.pl for 5.1

the "If I had a hammer" patch.

Had to answer the question, did it in code.

Much more accurrate results for 5.1


ndb/tools/ndb_size.pl:
  Add experimental 5.1-dd support to ndb_size.pl
  Improve calculations with 5.1 and variable sized attributes.
  
  Now much more closely resembles reality.
ndb/tools/ndb_size.tmpl:
  Comment on 5.1-dd exp support.
  
  Display:
  - nr varsized attributes
  - actual row overhead depending on version
  - overhead for varsized attributes
parent 81f1acc4
Loading
Loading
Loading
Loading
+50 −30
Original line number Diff line number Diff line
@@ -57,7 +57,7 @@ if(@ARGV < 3 || $ARGV[0] eq '--usage' || $ARGV[0] eq '--help')
    $template->param(dsn => $dsn);
}

my @releases = ({rel=>'4.1'},{rel=>'5.0'},{rel=>'5.1'});
my @releases = ({rel=>'4.1'},{rel=>'5.0'},{rel=>'5.1'},{rel=>'5.1-dd'});
$template->param(releases => \@releases);

my $tables  = $dbh->selectall_arrayref("show tables");
@@ -91,12 +91,14 @@ foreach(@{$tables})

    # We now work out the DataMemory usage
    
    # sizes for   4.1, 5.0, 5.1
    my @totalsize= (0,0,0);
    # sizes for   4.1, 5.0, 5.1 and 5.1-dd
    my @totalsize= (0,0,0,0);
    my $nrvarsize= 0;

    foreach(keys %$info)
    {
	my @realsize = (0,0,0);
	my @realsize = (0,0,0,0);
	my @varsize  = (0,0,0,0);
	my $type;
	my $size;
	my $name= $_;
@@ -112,53 +114,56 @@ foreach(@{$tables})
	}

	if($type =~ /tinyint/)
	{@realsize=(1,1,1)}
	{@realsize=(1,1,1,1)}
	elsif($type =~ /smallint/)
	{@realsize=(2,2,2)}
	{@realsize=(2,2,2,2)}
	elsif($type =~ /mediumint/)
	{@realsize=(3,3,3)}
	{@realsize=(3,3,3,3)}
	elsif($type =~ /bigint/)
	{@realsize=(8,8,8)}
	{@realsize=(8,8,8,8)}
	elsif($type =~ /int/)
	{@realsize=(4,4,4)}
	{@realsize=(4,4,4,4)}
	elsif($type =~ /float/)
	{
	    if($size<=24)
	    {@realsize=(4,4,4)}
	    {@realsize=(4,4,4,4)}
	    else
	    {@realsize=(8,8,8)}
	    {@realsize=(8,8,8,8)}
	}
	elsif($type =~ /double/ || $type =~ /real/)
	{@realsize=(8,8,8)}
	{@realsize=(8,8,8,8)}
	elsif($type =~ /bit/)
	{
	    my $a=($size+7)/8;
	    @realsize = ($a,$a,$a);
	    @realsize = ($a,$a,$a,$a);
	}
	elsif($type =~ /datetime/)
	{@realsize=(8,8,8)}
	{@realsize=(8,8,8,8)}
	elsif($type =~ /timestamp/)
	{@realsize=(4,4,4)}
	{@realsize=(4,4,4,4)}
	elsif($type =~ /date/ || $type =~ /time/)
	{@realsize=(3,3,3)}
	{@realsize=(3,3,3,3)}
	elsif($type =~ /year/)
	{@realsize=(1,1,1)}
	{@realsize=(1,1,1,1)}
	elsif($type =~ /varchar/ || $type =~ /varbinary/)
	{
	    my $fixed= 1+$size;
	    my $fixed=$size+ceil($size/256);
	    my @dynamic=$dbh->selectrow_array("select avg(length(`"
					      .$name
					      ."`)) from `".$table.'`');
	    $dynamic[0]=0 if !$dynamic[0];
	    @realsize= ($fixed,$fixed,ceil($dynamic[0]));
	    $dynamic[0]+=ceil($dynamic[0]/256); # size bit
	    $nrvarsize++;
	    $varsize[3]= ceil($dynamic[0]);
	    @realsize= ($fixed,$fixed,ceil($dynamic[0]),$fixed);
	}
	elsif($type =~ /binary/ || $type =~ /char/)
	{@realsize=($size,$size,$size)}
	{@realsize=($size,$size,$size,$size)}
	elsif($type =~ /text/ || $type =~ /blob/)
	{
	    @realsize=(256,256,1);
	    @realsize=(256,256,256,256);
	    $NoOfTables[$_]{val} += 1 foreach 0..$#releases; # blob uses table
	} # FIXME check if 5.1 is correct
	}

	@realsize= align(4,@realsize);

@@ -212,20 +217,20 @@ foreach(@{$tables})
	    type=>'bigint',
	    size=>8,
	    key=>'PRI',
	    datamemory=>[{val=>8},{val=>8},{val=>8}],
	    datamemory=>[{val=>8},{val=>8},{val=>8},{val=>8}],
	};
	$columnsize{'HIDDEN_NDB_PKEY'}= [8,8,8];
    }

    my @IndexDataMemory= ({val=>0},{val=>0},{val=>0});
    my @RowIndexMemory= ({val=>0},{val=>0},{val=>0});
    my @IndexDataMemory= ({val=>0},{val=>0},{val=>0},{val=>0});
    my @RowIndexMemory= ({val=>0},{val=>0},{val=>0},{val=>0});

    my @indexes;
    foreach my $index (keys %indexes) {
	my $im41= 25;
	$im41+=$columnsize{$_}[0] foreach @{$indexes{$index}{columns}};
	my @im = ({val=>$im41},{val=>25},{val=>25});
	my @dm = ({val=>10},{val=>10},{val=>10});
	my @im = ({val=>$im41},{val=>25},{val=>25},{val=>25});
	my @dm = ({val=>10},{val=>10},{val=>10},{val=>10});
	push @indexes, {
	    name=>$index,
	    type=>$indexes{$index}{type},
@@ -233,13 +238,22 @@ foreach(@{$tables})
	    indexmemory=>\@im,
	    datamemory=>\@dm,
	};
	$IndexDataMemory[$_]{val}+=$dm[$_]{val} foreach 0..2;
	$RowIndexMemory[$_]{val}+=$im[$_]{val} foreach 0..2;
	$IndexDataMemory[$_]{val}+=$dm[$_]{val} foreach 0..$#releases;
	$RowIndexMemory[$_]{val}+=$im[$_]{val} foreach 0..$#releases;
    }

    # total size + 16 bytes overhead
    my @TotalDataMemory;
    $TotalDataMemory[$_]{val}=$IndexDataMemory[$_]{val}+$totalsize[$_]+16 foreach 0..2;
    my @RowOverhead = ({val=>16},{val=>16},{val=>16},{val=>24});
    # 5.1 has ptr to varsize page, and per-varsize overhead
    my @nrvarsize_mem= ({val=>0},{val=>0},
			{val=>8},{val=>0});
    {
	my @a= align(4,$nrvarsize*2);
	$nrvarsize_mem[2]{val}+=$a[0]+$nrvarsize*4;
    }

    $TotalDataMemory[$_]{val}=$IndexDataMemory[$_]{val}+$totalsize[$_]+$RowOverhead[$_]{val}+$nrvarsize_mem[$_]{val} foreach 0..$#releases;

    my @RowDataMemory;
    push @RowDataMemory,{val=>$_} foreach @totalsize;
@@ -260,12 +274,18 @@ foreach(@{$tables})
    my @counts;
    $counts[$_]{val}= $count foreach 0..$#releases;

    my @nrvarsize_rel= ({val=>0},{val=>0},
			{val=>$nrvarsize},{val=>0});

    push @table_size, {
	table=>$table,
	indexes=>\@indexes,
	columns=>\@columns,
	count=>\@counts,
	RowOverhead=>\@RowOverhead,
	RowDataMemory=>\@RowDataMemory,
	nrvarsize=>\@nrvarsize_rel,
	nrvarsize_mem=>\@nrvarsize_mem,
	releases=>\@releases,
	IndexDataMemory=>\@IndexDataMemory,
	TotalDataMemory=>\@TotalDataMemory,
+16 −2
Original line number Diff line number Diff line
@@ -15,6 +15,8 @@ td,th { border: 1px solid black }

<p>This information should be valid for MySQL 4.1 and 5.0. Since 5.1 is not a final release yet, the numbers should be used as a guide only.</p>

<p>5.1-dd is for tables stored on disk. The ndb_size.pl estimates are <b>experimental</b> and should not be trusted. Notably we don't take into account indexed columns being in DataMemory versus non-indexed on disk.</p>

<h2>Parameter Settings</h2>
<p><b>NOTE</b> the configuration parameters below do not take into account system tables and other requirements.</p>
<table>
@@ -128,10 +130,22 @@ td,th { border: 1px solid black }
  <th><TMPL_VAR NAME=rel></th>
 </TMPL_LOOP>
</tr>
<tr>
 <th>Nr Varsized Attributes</th>
 <TMPL_LOOP NAME=nrvarsize>
  <td><TMPL_VAR NAME=val></td>
 </TMPL_LOOP>
</tr>
<tr>
 <th>Row Overhead</th>
 <TMPL_LOOP NAME=releases>
  <td>16</td>
 <TMPL_LOOP NAME=RowOverhead>
  <td><TMPL_VAR NAME=val></td>
 </TMPL_LOOP>
</tr>
<tr>
 <th>Varsized Overhead</th>
 <TMPL_LOOP NAME=nrvarsize_mem>
  <td><TMPL_VAR NAME=val></td>
 </TMPL_LOOP>
</tr>
<tr>