1
0

Improve refresh-egg performance by re-using existing commits for new branches

This commit is contained in:
Mike Schwörer 2022-10-17 19:57:11 +02:00
parent 18af582343
commit 1084bd36a6
Signed by: Mikescher
GPG Key ID: D3C7172E0A70F8CF
6 changed files with 140 additions and 47 deletions

19
.idea/sqlDataSources.xml generated Normal file
View File

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="SqlDataSourceStorage">
<option name="dataSources">
<list>
<State>
<option name="id" value="b1f51636-21bf-4ca9-a884-b0419489ab5f" />
<option name="name" value="EGG DB" />
<option name="languageId" value="SQLite" />
<option name="urls">
<array>
<option value="file://$PROJECT_DIR$/www/extern/egg/db_init.sql" />
</array>
</option>
</State>
</list>
</option>
</component>
</project>

5
.idea/sqldialects.xml generated
View File

@ -1,6 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="SqlDialectMappings"> <component name="SqlDialectMappings">
<file url="file://$PROJECT_DIR$" dialect="MySQL" />
<file url="file://$PROJECT_DIR$/www/extern/egg" dialect="SQLite" />
<file url="PROJECT" dialect="MySQL" /> <file url="PROJECT" dialect="MySQL" />
</component> </component>
<component name="SqlResolveMappings">
<file url="file://$PROJECT_DIR$/www/extern/egg" scope="{&quot;node&quot;:{ &quot;@negative&quot;:&quot;1&quot;, &quot;group&quot;:{ &quot;@kind&quot;:&quot;root&quot;, &quot;node&quot;:{ &quot;name&quot;:{ &quot;@qname&quot;:&quot;b1f51636-21bf-4ca9-a884-b0419489ab5f&quot; }, &quot;group&quot;:{ &quot;@kind&quot;:&quot;schema&quot;, &quot;node&quot;:{ &quot;name&quot;:{ &quot;@qname&quot;:&quot;&quot; } } } } } }}" />
</component>
</project> </project>

View File

@ -355,7 +355,7 @@ class EGGDatabase
public function deleteDanglingCommitdata(string $name) public function deleteDanglingCommitdata(string $name)
{ {
$hashes = $this->sql_query_assoc_prep("SELECT metadata.hash FROM metadata LEFT JOIN commits ON metadata.hash = commits.hash WHERE commits.hash IS NULL", []); $hashes = $this->sql_query_assoc_prep("SELECT metadata.hash AS mdh FROM metadata LEFT JOIN commits ON metadata.hash = commits.hash WHERE commits.hash IS NULL", []);
if (count($hashes) === 0) return; if (count($hashes) === 0) return;
@ -363,7 +363,7 @@ class EGGDatabase
$this->beginTransaction(); $this->beginTransaction();
foreach ($hashes as $hash) { foreach ($hashes as $hash) {
$this->sql_query_assoc_prep("DELETE FROM metadata WHERE hash = :hash", [ [":hash", $hash, PDO::PARAM_STR] ]); $this->sql_query_assoc_prep("DELETE FROM metadata WHERE hash = :hash", [ [":hash", $hash['mdh'], PDO::PARAM_STR] ]);
} }
$this->commitTransaction(); $this->commitTransaction();
@ -475,9 +475,13 @@ class EGGDatabase
/** /**
* @return Commit[] * @return Commit[]
*/ */
public function getCommits(Branch $branch): array public function getCommitsForBranch(Branch $branch): array
{ {
$rows = $this->sql_query_assoc("SELECT metadata.*, commits.id AS commitid FROM commits LEFT JOIN metadata WHERE commits.branch_id = :bid", [[":bid", $branch->ID, PDO::PARAM_INT]]); $rows = $this->sql_query_assoc_prep("SELECT metadata.*, commits.id AS commitid FROM commits LEFT JOIN metadata ON metadata.hash = commits.hash WHERE commits.branch_id = :bid",
[
[":bid", $branch->ID, PDO::PARAM_INT]
]);
$r = []; $r = [];
foreach ($rows as $row) foreach ($rows as $row)
{ {
@ -491,7 +495,35 @@ class EGGDatabase
$c->CommitterEmail = $row['committer_email']; $c->CommitterEmail = $row['committer_email'];
$c->Message = $row['message']; $c->Message = $row['message'];
$c->Date = $row['date']; $c->Date = $row['date'];
$c->Parents = $row['parent_commits']; $c->Parents = array_filter(explode(';', $row['parent_commits']), fn($p) => $p !== '');
$r []= $c;
}
return $r;
}
/**
* @return Commit[]
*/
public function getCommitsForRepo(Repository $repo, Branch $branchValue): array
{
$rows = $this->sql_query_assoc_prep("SELECT DISTINCT metadata.* FROM branches INNER JOIN commits ON branches.id = commits.branch_id LEFT JOIN metadata ON metadata.hash = commits.hash WHERE branches.repo_id = :rid",
[
[":rid", $repo->ID, PDO::PARAM_INT]
]);
$r = [];
foreach ($rows as $row)
{
$c = new Commit();
$c->Branch = $branchValue;
$c->Hash = $row['hash'];
$c->AuthorName = $row['author_name'];
$c->AuthorEmail = $row['author_email'];
$c->CommitterName = $row['committer_name'];
$c->CommitterEmail = $row['committer_email'];
$c->Message = $row['message'];
$c->Date = $row['date'];
$c->Parents = array_filter(explode(';', $row['parent_commits']), fn($p) => $p !== '');
$r []= $c; $r []= $c;
} }
return $r; return $r;

View File

@ -32,10 +32,10 @@ class Branch
/** @var Repository */ /** @var Repository */
public $Repo; public $Repo;
/** @var string */ /** @var string|null */
public $Head; public $Head;
/** @var string */ /** @var string|null */
public $HeadFromAPI = null; public $HeadFromAPI = null;
/** @var string */ /** @var string */

View File

@ -72,15 +72,15 @@ abstract class StandardGitConnection implements IRemoteSource
continue; continue;
} }
$commits = $this->listAndUpdateCommits($db, $repo, $branch); $newcommits = $this->listAndUpdateCommits($db, $repo, $branch);
$db->setUpdateDateOnBranch($branch); $db->setUpdateDateOnBranch($branch);
if (count($commits) === 0) if (count($newcommits) === 0)
{ {
$this->logger->proclog("Branch: [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] has no new commits"); $this->logger->proclog("Branch: [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] has no new commits");
continue; continue;
} }
$this->logger->proclog("Found " . count($commits) . " new commits in Branch: [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "]"); $this->logger->proclog("Found " . count($newcommits) . " new commits in Branch: [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "]");
$repo_changed = true; $repo_changed = true;
$db->setChangeDateOnBranch($branch); $db->setChangeDateOnBranch($branch);
@ -269,34 +269,36 @@ abstract class StandardGitConnection implements IRemoteSource
$targetFound = false; $targetFound = false;
$next_sha = [ $branch->HeadFromAPI ]; $next_sha = [ $branch->HeadFromAPI ];
$visited = array_map(function(Commit $m):string{return $m->Hash;}, $db->getCommits($branch)); $visited = array_map(function(Commit $m):string{return $m->Hash;}, $db->getCommitsForBranch($branch));
$this->logger->proclog("Query commit for [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] (initial @ {" . substr($next_sha[0], 0, 8) . "})"); $query_counter=0;
$json = $this->queryCommits($repo->Name, $branch->Name, $next_sha[0]); $existing = [];
$reusedFromExisting = 0;
if ($branch->Head === null) {
for ($pg=2;;$pg++) // new branch, perhaps we can mix'n'match existing commits+metadata
$this->logger->proclog("Query existing commits for [" . $this->name . "|" . $repo->Name . "] (potentially reuse for new branch '" . $branch->Name . "')");
foreach ($db->getCommitsForRepo($repo, $branch) as $c) $existing[$c->Hash] = $c;
}
$query_counter++;
$this->logger->proclog("Query commits for [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] (initial @ {" . substr($next_sha[0], 0, 8) . "}) (target: {" . substr($target ?? 'NULL', 0, 8) . "})");
$unprocessed = array_map(fn($p) => $this->createCommit($branch, $p), $this->queryCommits($repo->Name, $branch->Name, $next_sha[0]));
for (;;)
{ {
foreach ($json as $result_commit) foreach ($unprocessed as $commit)
{ {
$jdata = $this->readCommit($result_commit); while (($rmshakey = array_search($commit->Hash, $next_sha)) !== false) unset($next_sha[$rmshakey]);
$sha = $jdata['sha']; if (in_array($commit->Hash, $visited)) continue;
$author_name = $jdata['author_name']; $visited []= $commit->Hash;
$author_email = $jdata['author_email'];
$committer_name = $jdata['committer_name'];
$committer_email = $jdata['committer_email'];
$message = $jdata['message'];
$date = $jdata['date'];
$parents = $jdata['parents']; if ($commit->Hash === $target) $targetFound = true;
if (($rmshakey = array_search($sha, $next_sha)) !== false) unset($next_sha[$rmshakey]);
if (in_array($sha, $visited)) continue;
$visited []= $sha;
if ($sha === $target) $targetFound = true;
if ($targetFound && count($next_sha) === 0) if ($targetFound && count($next_sha) === 0)
{ {
@ -316,20 +318,9 @@ abstract class StandardGitConnection implements IRemoteSource
} }
} }
$commit = new Commit();
$commit->Branch = $branch;
$commit->Hash = $sha;
$commit->AuthorName = $author_name;
$commit->AuthorEmail = $author_email;
$commit->CommitterName = $committer_name;
$commit->CommitterEmail = $committer_email;
$commit->Message = $message;
$commit->Date = $date;
$commit->Parents = $parents;
$newcommits []= $commit; $newcommits []= $commit;
foreach ($parents as $p) foreach ($commit->Parents as $p)
{ {
$next_sha []= $p; $next_sha []= $p;
} }
@ -338,12 +329,29 @@ abstract class StandardGitConnection implements IRemoteSource
$next_sha = array_values($next_sha); // fix numeric keys $next_sha = array_values($next_sha); // fix numeric keys
if (count($next_sha) === 0) break; if (count($next_sha) === 0) break;
$this->logger->proclog("Query commit for [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] (" . $pg . " @ {" . substr($next_sha[0], 0, 8) . "})"); if (array_key_exists($next_sha[0], $existing)) {
// fast-track for existing Commits
$unprocessed = [ $existing[$next_sha[0]] ];
$reusedFromExisting++;
} else {
$query_counter++;
$this->logger->proclog("Query commits for [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] (" . $query_counter . " @ {" . substr($next_sha[0], 0, 8) . "})");
$unprocessed = array_map(fn($p) => $this->createCommit($branch, $p), $this->queryCommits($repo->Name, $branch->Name, $next_sha[0]));
$json = $this->queryCommits($repo->Name, $branch->Name, $next_sha[0]);
} }
$this->logger->proclog("HEAD pointer in Branch: [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] no longer matches. Re-query all " . count($newcommits) . " commits (old HEAD := {".substr($branch->Head ?? 'NULL', 0, 8)."}, missing: [" . join(", ", array_map(function($p){return substr($p ?? 'NULL', 0, 8);}, $next_sha)) . "] )"); }
if ($branch->Head === null) {
$this->logger->proclog("HEAD pointer in new Branch: [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] set to ". $branch->HeadFromAPI ." Queried " . count($newcommits) . " commits (reused $reusedFromExisting commits from DB)");
} else {
$this->logger->proclog("HEAD pointer in Branch: [" . $this->name . "|" . $repo->Name . "|" . $branch->Name . "] no longer matches. Re-queried all " . count($newcommits) . " commits (old HEAD := {".substr($branch->Head ?? 'NULL', 0, 8)."}, missing: [" . join(", ", array_map(function($p){return substr($p ?? 'NULL', 0, 8);}, $next_sha)) . "] )");
}
$db->deleteAllCommits($branch); $db->deleteAllCommits($branch);
@ -359,6 +367,35 @@ abstract class StandardGitConnection implements IRemoteSource
return $newcommits; return $newcommits;
} }
private function createCommit(Branch $branch, $result_commit): Commit
{
$jdata = $this->readCommit($result_commit);
$sha = $jdata['sha'];
$author_name = $jdata['author_name'];
$author_email = $jdata['author_email'];
$committer_name = $jdata['committer_name'];
$committer_email = $jdata['committer_email'];
$message = $jdata['message'];
$date = $jdata['date'];
$parents = $jdata['parents'];
$commit = new Commit();
$commit->Branch = $branch;
$commit->Hash = $sha;
$commit->AuthorName = $author_name;
$commit->AuthorEmail = $author_email;
$commit->CommitterName = $committer_name;
$commit->CommitterEmail = $committer_email;
$commit->Message = $message;
$commit->Date = $date;
$commit->Parents = $parents;
return $commit;
}
/** @inheritDoc */ /** @inheritDoc */
public function getName() { return $this->name; } public function getName() { return $this->name; }

View File

@ -1,3 +1,3 @@
This year I don't have too much free time. SO I won't use a completely new language but one which I'm currently using in my dayjob: golang. This year I don't have too much free time. So I won't use a completely new language but one which I'm currently using in my dayjob: golang.
I'm not the biggest fan of go (I think it's ok, but could really make with more convenience features), but I'm pretty experienced in it and can hopefully work a bit faster through the problems whis year. I'm not the biggest fan of go (I think it's ok, but could really make with more convenience features), but I'm pretty experienced in it and can hopefully work a bit faster through the problems whis year.