You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

contents.cc 11 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. // -*- mode: cpp; mode: fold -*-
  2. // Description /*{{{*/
  3. /* ######################################################################
  4. contents - Archive contents generator
  5. The GenContents class is a back end for an archive contents generator.
  6. It takes a list of per-deb file name and merges it into a memory
  7. database of all previous output. This database is stored as a set
  8. of binary trees linked across directories to form a tree of all files+dirs
  9. given to it. The tree will also be sorted as it is built up thus
  10. removing the massive sort time overhead.
  11. By breaking all the pathnames into components and storing them
  12. separately a space saving is realized by not duplicating the string
  13. over and over again. Ultimately this saving is sacrificed to storage of
  14. the tree structure itself but the tree structure yields a speed gain
  15. in the sorting and processing. Ultimately it takes about 5 seconds to
  16. do 141000 nodes and about 5 meg of ram.
  17. The tree looks something like:
  18. usr/
  19. / \ / libslang
  20. bin/ lib/ --> libc6
  21. / \ \ libfoo
  22. games/ sbin/
  23. The ---> is the DirDown link
  24. ##################################################################### */
  25. /*}}}*/
  26. // Include Files /*{{{*/
  27. #include <config.h>
  28. #include <apt-pkg/debfile.h>
  29. #include <apt-pkg/dirstream.h>
  30. #include <apt-pkg/error.h>
  31. #include <apt-pkg/fileutl.h>
  32. #include <stdio.h>
  33. #include <stdlib.h>
  34. #include <string.h>
  35. #include "contents.h"
  36. #include <apti18n.h>
  37. /*}}}*/
  38. // GenContents::~GenContents - Free allocated memory /*{{{*/
  39. // ---------------------------------------------------------------------
  40. /* Since all our allocations are static big-block allocations all that is
  41. needed is to free all of them. */
  42. GenContents::~GenContents()
  43. {
  44. while (BlockList != 0)
  45. {
  46. BigBlock *Old = BlockList;
  47. BlockList = Old->Next;
  48. free(Old->Block);
  49. delete Old;
  50. }
  51. }
  52. /*}}}*/
  53. // GenContents::Mystrdup - Custom strdup /*{{{*/
  54. // ---------------------------------------------------------------------
  55. /* This strdup also uses a large block allocator to eliminate glibc
  56. overhead */
  57. char *GenContents::Mystrdup(const char *From)
  58. {
  59. unsigned int Len = strlen(From) + 1;
  60. if (StrLeft <= Len)
  61. {
  62. StrLeft = 4096*10;
  63. StrPool = (char *)malloc(StrLeft);
  64. BigBlock *Block = new BigBlock;
  65. Block->Block = StrPool;
  66. Block->Next = BlockList;
  67. BlockList = Block;
  68. }
  69. memcpy(StrPool,From,Len);
  70. StrLeft -= Len;
  71. char *Res = StrPool;
  72. StrPool += Len;
  73. return Res;
  74. }
  75. /*}}}*/
  76. // GenContents::Node::operator new - Big block allocator /*{{{*/
  77. // ---------------------------------------------------------------------
  78. /* This eliminates glibc's malloc overhead by allocating large blocks and
  79. having a continuous set of Nodes. This takes about 8 bytes off each nodes
  80. space needs. Freeing is not supported. */
  81. void *GenContents::Node::operator new(size_t Amount,GenContents *Owner)
  82. {
  83. if (Owner->NodeLeft == 0)
  84. {
  85. Owner->NodeLeft = 10000;
  86. Owner->NodePool = static_cast<Node *>(malloc(Amount*Owner->NodeLeft));
  87. BigBlock *Block = new BigBlock;
  88. Block->Block = Owner->NodePool;
  89. Block->Next = Owner->BlockList;
  90. Owner->BlockList = Block;
  91. }
  92. Owner->NodeLeft--;
  93. return Owner->NodePool++;
  94. }
  95. /*}}}*/
  96. // GenContents::Grab - Grab a new node representing Name under Top /*{{{*/
  97. // ---------------------------------------------------------------------
  98. /* This grabs a new node representing the pathname component Name under
  99. the node Top. The node is given the name Package. It is assumed that Name
  100. is inside of top. If a duplicate already entered name is found then
  101. a note is made on the Dup list and the previous in-tree node is returned. */
  102. GenContents::Node *GenContents::Grab(GenContents::Node *Top,const char *Name,
  103. const char *Package)
  104. {
  105. /* We drop down to the next dir level each call. This simplifies
  106. the calling routine */
  107. if (Top->DirDown == 0)
  108. {
  109. Node *Item = new(this) Node;
  110. Item->Path = Mystrdup(Name);
  111. Item->Package = Package;
  112. Top->DirDown = Item;
  113. return Item;
  114. }
  115. Top = Top->DirDown;
  116. int Res;
  117. while (1)
  118. {
  119. Res = strcmp(Name,Top->Path);
  120. // Collision!
  121. if (Res == 0)
  122. {
  123. // See if this is the same package (multi-version dup)
  124. if (Top->Package == Package ||
  125. strcasecmp(Top->Package,Package) == 0)
  126. return Top;
  127. // Look for an already existing Dup
  128. for (Node *I = Top->Dups; I != 0; I = I->Dups)
  129. if (I->Package == Package ||
  130. strcasecmp(I->Package,Package) == 0)
  131. return Top;
  132. // Add the dup in
  133. Node *Item = new(this) Node;
  134. Item->Path = Top->Path;
  135. Item->Package = Package;
  136. Item->Dups = Top->Dups;
  137. Top->Dups = Item;
  138. return Top;
  139. }
  140. // Continue to traverse the tree
  141. if (Res < 0)
  142. {
  143. if (Top->BTreeLeft == 0)
  144. break;
  145. Top = Top->BTreeLeft;
  146. }
  147. else
  148. {
  149. if (Top->BTreeRight == 0)
  150. break;
  151. Top = Top->BTreeRight;
  152. }
  153. }
  154. // The item was not found in the tree
  155. Node *Item = new(this) Node;
  156. Item->Path = Mystrdup(Name);
  157. Item->Package = Package;
  158. // Link it into the tree
  159. if (Res < 0)
  160. {
  161. Item->BTreeLeft = Top->BTreeLeft;
  162. Top->BTreeLeft = Item;
  163. }
  164. else
  165. {
  166. Item->BTreeRight = Top->BTreeRight;
  167. Top->BTreeRight = Item;
  168. }
  169. return Item;
  170. }
  171. /*}}}*/
  172. // GenContents::Add - Add a path to the tree /*{{{*/
  173. // ---------------------------------------------------------------------
  174. /* This takes a full pathname and adds it into the tree. We split the
  175. pathname into directory fragments adding each one as we go. Technically
  176. in output from tar this should result in hitting previous items. */
  177. void GenContents::Add(const char *Dir,const char *Package)
  178. {
  179. Node *Root = &this->Root;
  180. // Drop leading slashes
  181. while (*Dir == '/' && *Dir != 0)
  182. Dir++;
  183. // Run over the string and grab out each bit up to and including a /
  184. const char *Start = Dir;
  185. const char *I = Dir;
  186. while (*I != 0)
  187. {
  188. if (*I != '/' || I - Start <= 1)
  189. {
  190. I++;
  191. continue;
  192. }
  193. I++;
  194. // Copy the path fragment over
  195. char Tmp[1024];
  196. strncpy(Tmp,Start,I - Start);
  197. Tmp[I - Start] = 0;
  198. // Grab a node for it
  199. Root = Grab(Root,Tmp,Package);
  200. Start = I;
  201. }
  202. // The final component if it does not have a trailing /
  203. if (I - Start >= 1)
  204. Grab(Root,Start,Package);
  205. }
  206. /*}}}*/
  207. // GenContents::WriteSpace - Write a given number of white space chars /*{{{*/
  208. // ---------------------------------------------------------------------
  209. /* We mod 8 it and write tabs where possible. */
  210. void GenContents::WriteSpace(std::string &out, size_t Current, size_t Target)
  211. {
  212. if (Target <= Current)
  213. Target = Current + 1;
  214. /* Now we write tabs so long as the next tab stop would not pass
  215. the target */
  216. for (; (Current/8 + 1)*8 < Target; Current = (Current/8 + 1)*8)
  217. out.append("\t");
  218. // Fill the last bit with spaces
  219. for (; Current < Target; Current++)
  220. out.append(" ");
  221. }
  222. /*}}}*/
  223. // GenContents::Print - Display the tree /*{{{*/
  224. // ---------------------------------------------------------------------
  225. /* This is the final result function. It takes the tree and recursively
  226. calls itself and runs over each section of the tree printing out
  227. the pathname and the hit packages. We use Buf to build the pathname
  228. summed over all the directory parents of this node. */
  229. void GenContents::Print(FileFd &Out)
  230. {
  231. char Buffer[1024];
  232. Buffer[0] = 0;
  233. DoPrint(Out,&Root,Buffer);
  234. }
  235. void GenContents::DoPrint(FileFd &Out,GenContents::Node *Top, char *Buf)
  236. {
  237. if (Top == 0)
  238. return;
  239. // Go left
  240. DoPrint(Out,Top->BTreeLeft,Buf);
  241. // Print the current dir location and then descend to lower dirs
  242. char *OldEnd = Buf + strlen(Buf);
  243. if (Top->Path != 0)
  244. {
  245. strcat(Buf,Top->Path);
  246. // Do not show the item if it is a directory with dups
  247. if (Top->Path[strlen(Top->Path)-1] != '/' /*|| Top->Dups == 0*/)
  248. {
  249. std::string out = Buf;
  250. WriteSpace(out, out.length(), 60);
  251. for (Node *I = Top; I != 0; I = I->Dups)
  252. {
  253. if (I != Top)
  254. out.append(",");
  255. out.append(I->Package);
  256. }
  257. out.append("\n");
  258. Out.Write(out.c_str(), out.length());
  259. }
  260. }
  261. // Go along the directory link
  262. DoPrint(Out,Top->DirDown,Buf);
  263. *OldEnd = 0;
  264. // Go right
  265. DoPrint(Out,Top->BTreeRight,Buf);
  266. }
  267. /*}}}*/
  268. // ContentsExtract Constructor /*{{{*/
  269. ContentsExtract::ContentsExtract()
  270. : Data(0), MaxSize(0), CurSize(0)
  271. {
  272. }
  273. /*}}}*/
  274. // ContentsExtract Destructor /*{{{*/
  275. ContentsExtract::~ContentsExtract()
  276. {
  277. free(Data);
  278. }
  279. /*}}}*/
  280. // ContentsExtract::Read - Read the archive /*{{{*/
  281. // ---------------------------------------------------------------------
  282. /* */
  283. bool ContentsExtract::Read(debDebFile &Deb)
  284. {
  285. Reset();
  286. return Deb.ExtractArchive(*this);
  287. }
  288. /*}}}*/
  289. // ContentsExtract::DoItem - Extract an item /*{{{*/
  290. // ---------------------------------------------------------------------
  291. /* This just tacks the name onto the end of our memory buffer */
  292. bool ContentsExtract::DoItem(Item &Itm, int &/*Fd*/)
  293. {
  294. unsigned long Len = strlen(Itm.Name);
  295. // Strip leading ./'s
  296. if (Itm.Name[0] == '.' && Itm.Name[1] == '/')
  297. {
  298. // == './'
  299. if (Len == 2)
  300. return true;
  301. Len -= 2;
  302. Itm.Name += 2;
  303. }
  304. // Allocate more storage for the string list
  305. if (CurSize + Len + 2 >= MaxSize || Data == 0)
  306. {
  307. if (MaxSize == 0)
  308. MaxSize = 512*1024/2;
  309. char *NewData = (char *)realloc(Data,MaxSize*2);
  310. if (NewData == 0)
  311. return _error->Error(_("realloc - Failed to allocate memory"));
  312. Data = NewData;
  313. MaxSize *= 2;
  314. }
  315. strcpy(Data+CurSize,Itm.Name);
  316. CurSize += Len + 1;
  317. return true;
  318. }
  319. /*}}}*/
  320. // ContentsExtract::TakeContents - Load the contents data /*{{{*/
  321. // ---------------------------------------------------------------------
  322. /* */
  323. bool ContentsExtract::TakeContents(const void *NewData,unsigned long long Length)
  324. {
  325. if (Length == 0)
  326. {
  327. CurSize = 0;
  328. return true;
  329. }
  330. // Allocate more storage for the string list
  331. if (Length + 2 >= MaxSize || Data == 0)
  332. {
  333. if (MaxSize == 0)
  334. MaxSize = 512*1024/2;
  335. while (MaxSize*2 <= Length)
  336. MaxSize *= 2;
  337. char *NewData = (char *)realloc(Data,MaxSize*2);
  338. if (NewData == 0)
  339. return _error->Error(_("realloc - Failed to allocate memory"));
  340. Data = NewData;
  341. MaxSize *= 2;
  342. }
  343. memcpy(Data,NewData,Length);
  344. CurSize = Length;
  345. return Data[CurSize-1] == 0;
  346. }
  347. /*}}}*/
  348. // ContentsExtract::Add - Read the contents data into the sorter /*{{{*/
  349. // ---------------------------------------------------------------------
  350. /* */
  351. void ContentsExtract::Add(GenContents &Contents,std::string const &Package)
  352. {
  353. const char *Start = Data;
  354. char *Pkg = Contents.Mystrdup(Package.c_str());
  355. for (const char *I = Data; I < Data + CurSize; I++)
  356. {
  357. if (*I == 0)
  358. {
  359. Contents.Add(Start,Pkg);
  360. Start = ++I;
  361. }
  362. }
  363. }
  364. /*}}}*/