Browse Source

drop stored StringItems in favor of in-memory mappings

Strings like Section names or architectures are needed vary often.
Instead of writing them each time we need them, we deploy sharing for
these special strings. Until now, this was done with a linked list of
strings in which we would search, which was stored in the cache.
It turns out we can do this just as well in memory as well with a bunch
of std::map's.

In memory means here that it isn't available anymore if we have a partly
invalid cache, but that isn't much of a problem in practice as the
status file is compared to the other files we parse very small and includes
mostly duplicates, so the space we would gain by storing is more or less
equal to the size of the stored linked list…
debian/1.8.y
David Kalnischkies 9 years ago
parent
commit
78a5476f31
  1. 2
      apt-pkg/deb/debindexfile.cc
  2. 45
      apt-pkg/deb/deblistparser.cc
  3. 1
      apt-pkg/deb/deblistparser.h
  4. 2
      apt-pkg/edsp/edspindexfile.cc
  5. 2
      apt-pkg/pkgcache.cc
  6. 30
      apt-pkg/pkgcache.h
  7. 81
      apt-pkg/pkgcachegen.cc
  8. 18
      apt-pkg/pkgcachegen.h

2
apt-pkg/deb/debindexfile.cc

@ -619,7 +619,7 @@ bool debStatusIndex::Merge(pkgCacheGenerator &Gen,OpProgress *Prog) const
pkgCache::PkgFileIterator CFile = Gen.GetCurFile();
CFile->Size = Pkg.FileSize();
CFile->mtime = Pkg.ModificationTime();
map_stringitem_t const storage = Gen.WriteUniqString("now");
map_stringitem_t const storage = Gen.StoreString(pkgCacheGenerator::MIXED, "now");
CFile->Archive = storage;
if (Gen.MergeList(Parser) == false)

45
apt-pkg/deb/deblistparser.cc

@ -58,18 +58,6 @@ debListParser::debListParser(FileFd *File, string const &Arch) : Tags(File),
MultiArchEnabled = Architectures.size() > 1;
}
/*}}}*/
// ListParser::UniqFindTagWrite - Find the tag and write a unq string /*{{{*/
// ---------------------------------------------------------------------
/* */
map_stringitem_t debListParser::UniqFindTagWrite(const char *Tag)
{
const char *Start;
const char *Stop;
if (Section.Find(Tag,Start,Stop) == false)
return 0;
return WriteUniqString(Start,Stop - Start);
}
/*}}}*/
// ListParser::Package - Return the package name /*{{{*/
// ---------------------------------------------------------------------
/* This is to return the name of the package this section describes */
@ -144,9 +132,16 @@ unsigned char debListParser::ParseMultiArch(bool const showErrors) /*{{{*/
/* */
bool debListParser::NewVersion(pkgCache::VerIterator &Ver)
{
const char *Start;
const char *Stop;
// Parse the section
unsigned long const idxSection = UniqFindTagWrite("Section");
Ver->Section = idxSection;
if (Section.Find("Section",Start,Stop) == true)
{
map_stringitem_t const idx = StoreString(pkgCacheGenerator::SECTION, Start, Stop - Start);
Ver->Section = idx;
}
Ver->MultiArch = ParseMultiArch(true);
// Archive Size
Ver->Size = Section.FindULL("Size");
@ -155,10 +150,8 @@ bool debListParser::NewVersion(pkgCache::VerIterator &Ver)
Ver->InstalledSize *= 1024;
// Priority
const char *Start;
const char *Stop;
if (Section.Find("Priority",Start,Stop) == true)
{
{
if (GrabWord(string(Start,Stop-Start),PrioList,Ver->Priority) == false)
Ver->Priority = pkgCache::State::Extra;
}
@ -891,7 +884,7 @@ bool debListParser::LoadReleaseInfo(pkgCache::PkgFileIterator &FileI,
{
// apt-secure does no longer download individual (per-section) Release
// file. to provide Component pinning we use the section name now
map_stringitem_t const storage = WriteUniqString(component);
map_stringitem_t const storage = StoreString(pkgCacheGenerator::MIXED, component);
FileI->Component = storage;
pkgTagFile TagFile(&File, File.Size());
@ -900,19 +893,19 @@ bool debListParser::LoadReleaseInfo(pkgCache::PkgFileIterator &FileI,
return false;
std::string data;
#define APT_INRELEASE(TAG, STORE) \
#define APT_INRELEASE(TYPE, TAG, STORE) \
data = Section.FindS(TAG); \
if (data.empty() == false) \
{ \
map_stringitem_t const storage = WriteUniqString(data); \
map_stringitem_t const storage = StoreString(pkgCacheGenerator::TYPE, data); \
STORE = storage; \
}
APT_INRELEASE("Suite", FileI->Archive)
APT_INRELEASE("Component", FileI->Component)
APT_INRELEASE("Version", FileI->Version)
APT_INRELEASE("Origin", FileI->Origin)
APT_INRELEASE("Codename", FileI->Codename)
APT_INRELEASE("Label", FileI->Label)
APT_INRELEASE(MIXED, "Suite", FileI->Archive)
APT_INRELEASE(MIXED, "Component", FileI->Component)
APT_INRELEASE(VERSION, "Version", FileI->Version)
APT_INRELEASE(MIXED, "Origin", FileI->Origin)
APT_INRELEASE(MIXED, "Codename", FileI->Codename)
APT_INRELEASE(MIXED, "Label", FileI->Label)
#undef APT_INRELEASE
Section.FindFlag("NotAutomatic", FileI->Flags, pkgCache::Flag::NotAutomatic);
Section.FindFlag("ButAutomaticUpgrades", FileI->Flags, pkgCache::Flag::ButAutomaticUpgrades);

1
apt-pkg/deb/deblistparser.h

@ -49,7 +49,6 @@ class debListParser : public pkgCacheGenerator::ListParser
std::vector<std::string> Architectures;
bool MultiArchEnabled;
map_stringitem_t UniqFindTagWrite(const char *Tag);
virtual bool ParseStatus(pkgCache::PkgIterator &Pkg,pkgCache::VerIterator &Ver);
bool ParseDepends(pkgCache::VerIterator &Ver,const char *Tag,
unsigned int Type);

2
apt-pkg/edsp/edspindexfile.cc

@ -56,7 +56,7 @@ bool edspIndex::Merge(pkgCacheGenerator &Gen,OpProgress *Prog) const
pkgCache::PkgFileIterator CFile = Gen.GetCurFile();
CFile->Size = Pkg.FileSize();
CFile->mtime = Pkg.ModificationTime();
map_stringitem_t const storage = Gen.WriteUniqString("edsp::scenario");
map_stringitem_t const storage = Gen.StoreString(pkgCacheGenerator::MIXED, "edsp::scenario");
CFile->Archive = storage;
if (Gen.MergeList(Parser) == false)

2
apt-pkg/pkgcache.cc

@ -82,7 +82,6 @@ pkgCache::Header::Header()
MaxDescFileSize = 0;
FileList = 0;
StringList = 0;
VerSysName = 0;
Architecture = 0;
HashTableSize = _config->FindI("APT::Cache-HashTableSize", 10 * 1048);
@ -140,7 +139,6 @@ bool pkgCache::ReMap(bool const &Errorchecks)
DescP = (Description *)Map.Data();
ProvideP = (Provides *)Map.Data();
DepP = (Dependency *)Map.Data();
StringItemP = (StringItem *)Map.Data();
StrP = (char *)Map.Data();
if (Errorchecks == false)

30
apt-pkg/pkgcache.h

@ -109,7 +109,6 @@ class pkgCache /*{{{*/
struct Description;
struct Provides;
struct Dependency;
struct StringItem;
struct VerFile;
struct DescFile;
@ -186,7 +185,6 @@ class pkgCache /*{{{*/
Description *DescP;
Provides *ProvideP;
Dependency *DepP;
StringItem *StringItemP;
char *StrP;
virtual bool ReMap(bool const &Errorchecks = true);
@ -290,12 +288,6 @@ struct pkgCache::Header
The PackageFile structures are singly linked lists that represent
all package files that have been merged into the cache. */
map_pointer_t FileList;
/** \brief index of the first StringItem structure
The cache contains a list of all the unique strings (StringItems).
The parser reads this list into memory so it can match strings
against it.*/
map_pointer_t StringList;
/** \brief String representing the version system used */
map_pointer_t VerSysName;
/** \brief native architecture the cache was built against */
@ -438,7 +430,7 @@ struct pkgCache::Package
struct pkgCache::PackageFile
{
/** \brief physical disk file that this PackageFile represents */
map_pointer_t FileName; // StringItem
map_stringitem_t FileName;
/** \brief the release information
Please see the files document for a description of what the
@ -606,7 +598,7 @@ struct pkgCache::Description
struct pkgCache::Dependency
{
/** \brief string of the version the dependency is applied against */
map_stringitem_t Version; // StringItem
map_stringitem_t Version;
/** \brief index of the package this depends applies to
The generator will - if the package does not already exist -
@ -656,24 +648,6 @@ struct pkgCache::Provides
map_pointer_t NextPkgProv; // Provides
};
/*}}}*/
// StringItem structure /*{{{*/
/** \brief used for generating single instances of strings
Some things like Section Name are are useful to have as unique tags.
It is part of a linked list based at pkgCache::Header::StringList
All strings are simply inlined any place in the file that is natural
for the writer. The client should make no assumptions about the positioning
of strings. All StringItems should be null-terminated. */
struct pkgCache::StringItem
{
/** \brief string this refers to */
map_stringitem_t String;
/** \brief Next link in the chain */
map_stringitem_t NextItem;
};
/*}}}*/
inline char const * pkgCache::NativeArch()
{ return StrP + HeaderP->Architecture; }

81
apt-pkg/pkgcachegen.cc

@ -57,8 +57,7 @@ pkgCacheGenerator::pkgCacheGenerator(DynamicMMap *pMap,OpProgress *Prog) :
FoundFileDeps(0)
{
CurrentFile = 0;
memset(UniqHash,0,sizeof(UniqHash));
if (_error->PendingError() == true)
return;
@ -82,9 +81,7 @@ pkgCacheGenerator::pkgCacheGenerator(DynamicMMap *pMap,OpProgress *Prog) :
if (unlikely(idxVerSysName == 0))
return;
Cache.HeaderP->VerSysName = idxVerSysName;
// this pointer is set in ReMap, but we need it now for WriteUniqString
Cache.StringItemP = (pkgCache::StringItem *)Map.Data();
map_stringitem_t const idxArchitecture = WriteUniqString(_config->Find("APT::Architecture"));
map_stringitem_t const idxArchitecture = StoreString(MIXED, _config->Find("APT::Architecture"));
if (unlikely(idxArchitecture == 0))
return;
Cache.HeaderP->Architecture = idxArchitecture;
@ -149,10 +146,6 @@ void pkgCacheGenerator::ReMap(void const * const oldMap, void const * const newM
CurrentFile += (pkgCache::PackageFile const * const) newMap - (pkgCache::PackageFile const * const) oldMap;
for (size_t i = 0; i < _count(UniqHash); ++i)
if (UniqHash[i] != 0)
UniqHash[i] += (pkgCache::StringItem const * const) newMap - (pkgCache::StringItem const * const) oldMap;
for (std::vector<pkgCache::GrpIterator*>::const_iterator i = Dynamic<pkgCache::GrpIterator>::toReMap.begin();
i != Dynamic<pkgCache::GrpIterator>::toReMap.end(); ++i)
(*i)->ReMap(oldMap, newMap);
@ -685,7 +678,7 @@ bool pkgCacheGenerator::NewPackage(pkgCache::PkgIterator &Pkg,const string &Name
#endif
Pkg->Group = Grp.Index();
// all is mapped to the native architecture
map_stringitem_t const idxArch = (Arch == "all") ? Cache.HeaderP->Architecture : WriteUniqString(Arch.c_str());
map_stringitem_t const idxArch = (Arch == "all") ? Cache.HeaderP->Architecture : StoreString(MIXED, Arch);
if (unlikely(idxArch == 0))
return false;
Pkg->Arch = idxArch;
@ -900,7 +893,7 @@ map_pointer_t pkgCacheGenerator::NewDescription(pkgCache::DescIterator &Desc,
// Fill it in
Desc = pkgCache::DescIterator(Cache,Cache.DescP + Description);
Desc->ID = Cache.HeaderP->DescriptionCount++;
map_stringitem_t const idxlanguage_code = WriteUniqString(Lang);
map_stringitem_t const idxlanguage_code = StoreString(MIXED, Lang);
if (unlikely(idxlanguage_code == 0))
return 0;
Desc->language_code = idxlanguage_code;
@ -1102,7 +1095,7 @@ bool pkgCacheGenerator::SelectFile(const string &File,const string &Site,
// Fill it in
map_stringitem_t const idxFileName = WriteStringInMap(File);
map_stringitem_t const idxSite = WriteUniqString(Site);
map_stringitem_t const idxSite = StoreString(MIXED, Site);
if (unlikely(idxFileName == 0 || idxSite == 0))
return false;
CurrentFile->FileName = idxFileName;
@ -1110,7 +1103,7 @@ bool pkgCacheGenerator::SelectFile(const string &File,const string &Site,
CurrentFile->NextFile = Cache.HeaderP->FileList;
CurrentFile->Flags = Flags;
CurrentFile->ID = Cache.HeaderP->PackageFileCount;
map_stringitem_t const idxIndexType = WriteUniqString(Index.GetType()->Label);
map_stringitem_t const idxIndexType = StoreString(MIXED, Index.GetType()->Label);
if (unlikely(idxIndexType == 0))
return false;
CurrentFile->IndexType = idxIndexType;
@ -1127,57 +1120,27 @@ bool pkgCacheGenerator::SelectFile(const string &File,const string &Site,
// ---------------------------------------------------------------------
/* This is used to create handles to strings. Given the same text it
always returns the same number */
map_stringitem_t pkgCacheGenerator::WriteUniqString(const char *S,
map_stringitem_t pkgCacheGenerator::StoreString(enum StringType const type, const char *S,
unsigned int Size)
{
/* We use a very small transient hash table here, this speeds up generation
by a fair amount on slower machines */
pkgCache::StringItem *&Bucket = UniqHash[(S[0]*5 + S[1]) % _count(UniqHash)];
if (Bucket != 0 &&
stringcmp(S,S+Size,Cache.StrP + Bucket->String) == 0)
return Bucket->String;
// Search for an insertion point
pkgCache::StringItem *I = Cache.StringItemP + Cache.HeaderP->StringList;
int Res = 1;
map_stringitem_t *Last = &Cache.HeaderP->StringList;
for (; I != Cache.StringItemP; Last = &I->NextItem,
I = Cache.StringItemP + I->NextItem)
{
Res = stringcmp(S,S+Size,Cache.StrP + I->String);
if (Res >= 0)
break;
std::string const key(S, Size);
std::map<std::string,map_stringitem_t> * strings;
switch(type) {
case MIXED: strings = &strMixed; break;
case PKGNAME: strings = &strPkgNames; break;
case VERSION: strings = &strVersions; break;
case SECTION: strings = &strSections; break;
default: _error->Fatal("Unknown enum type used for string storage of '%s'", key.c_str()); return 0;
}
// Match
if (Res == 0)
{
Bucket = I;
return I->String;
}
// Get a structure
void const * const oldMap = Map.Data();
map_pointer_t const Item = AllocateInMap(sizeof(pkgCache::StringItem));
if (Item == 0)
return 0;
map_stringitem_t const idxString = WriteStringInMap(S,Size);
if (unlikely(idxString == 0))
return 0;
if (oldMap != Map.Data()) {
Last += (map_pointer_t const * const) Map.Data() - (map_pointer_t const * const) oldMap;
I += (pkgCache::StringItem const * const) Map.Data() - (pkgCache::StringItem const * const) oldMap;
}
*Last = Item;
// Fill in the structure
pkgCache::StringItem *ItemP = Cache.StringItemP + Item;
ItemP->NextItem = I - Cache.StringItemP;
ItemP->String = idxString;
std::map<std::string,map_stringitem_t>::const_iterator const item = strings->find(key);
if (item != strings->end())
return item->second;
Bucket = ItemP;
return ItemP->String;
map_stringitem_t const idxString = WriteStringInMap(S,Size);
strings->insert(std::make_pair(key, idxString));
return idxString;
}
/*}}}*/
// CheckValidity - Check that a cache is up-to-date /*{{{*/

18
apt-pkg/pkgcachegen.h

@ -27,6 +27,7 @@
#include <vector>
#include <string>
#include <map>
class FileFd;
class pkgSourceList;
@ -36,13 +37,16 @@ class pkgIndexFile;
class pkgCacheGenerator /*{{{*/
{
private:
pkgCache::StringItem *UniqHash[26];
APT_HIDDEN map_stringitem_t WriteStringInMap(std::string const &String) { return WriteStringInMap(String.c_str()); };
APT_HIDDEN map_stringitem_t WriteStringInMap(const char *String);
APT_HIDDEN map_stringitem_t WriteStringInMap(const char *String, const unsigned long &Len);
APT_HIDDEN map_pointer_t AllocateInMap(const unsigned long &size);
std::map<std::string,map_stringitem_t> strMixed;
std::map<std::string,map_stringitem_t> strSections;
std::map<std::string,map_stringitem_t> strPkgNames;
std::map<std::string,map_stringitem_t> strVersions;
public:
class ListParser;
@ -91,8 +95,9 @@ class pkgCacheGenerator /*{{{*/
public:
map_stringitem_t WriteUniqString(const char *S,unsigned int const Size);
inline map_stringitem_t WriteUniqString(const std::string &S) {return WriteUniqString(S.c_str(),S.length());};
enum StringType { MIXED, PKGNAME, VERSION, SECTION };
map_stringitem_t StoreString(enum StringType const type, const char * S, unsigned int const Size);
inline map_stringitem_t StoreString(enum StringType const type, const std::string &S) {return StoreString(type, S.c_str(),S.length());};
void DropProgress() {Progress = 0;};
bool SelectFile(const std::string &File,const std::string &Site,pkgIndexFile const &Index,
@ -145,8 +150,9 @@ class pkgCacheGenerator::ListParser
protected:
inline map_stringitem_t WriteUniqString(std::string S) {return Owner->WriteUniqString(S);};
inline map_stringitem_t WriteUniqString(const char *S,unsigned int Size) {return Owner->WriteUniqString(S,Size);};
inline map_stringitem_t StoreString(pkgCacheGenerator::StringType const type, std::string const &S) {return Owner->StoreString(type, S);};
inline map_stringitem_t StoreString(pkgCacheGenerator::StringType const type, const char *S,unsigned int Size) {return Owner->StoreString(type, S, Size);};
inline map_stringitem_t WriteString(const std::string &S) {return Owner->WriteStringInMap(S);};
inline map_stringitem_t WriteString(const char *S,unsigned int Size) {return Owner->WriteStringInMap(S,Size);};
bool NewDepends(pkgCache::VerIterator &Ver,const std::string &Package, const std::string &Arch,

Loading…
Cancel
Save