dataloader.cpp

Go to the documentation of this file.
00001 /*
00002         Description: async stream reader, used to load repository data on startup
00003 
00004         Author: Marco Costalba (C) 2005-2006
00005 
00006         Copyright: See COPYING file that comes with this distribution
00007 
00008 */
00009 #include <stdlib.h> // rand()
00010 #include <time.h> // time()
00011 #include <qcstring.h>
00012 #include <qdir.h>
00013 #include <qprocess.h>
00014 #include "git.h"
00015 #include "dataloader.h"
00016 
00017 #define GUI_UPDATE_INTERVAL 500
00018 #define READ_BLOCK_SIZE     65535
00019 
00020 DataLoader::DataLoader(Git* g, FileHistory* f) : QObject(g), git(g), fh(f) {
00021 
00022         canceling = parsing = false;
00023         isProcExited = true;
00024         halfChunk = NULL;
00025         proc = NULL;
00026         loadedBytes = 0;
00027 
00028         connect(git, SIGNAL(cancelAllProcesses()), this, SLOT(on_cancel()));
00029         connect(&guiUpdateTimer, SIGNAL(timeout()), this, SLOT(on_timeout()));
00030 }
00031 
00032 void DataLoader::on_cancel(const FileHistory* f) {
00033 
00034         if (f == fh)
00035                 on_cancel();
00036 }
00037 
00038 bool DataLoader::start(SCList args, SCRef wd) {
00039 
00040         if (!isProcExited) {
00041                 dbs("ASSERT in DataLoader::start, called while processing");
00042                 return false;
00043         }
00044         isProcExited = false;
00045         if (!doStart(args, wd)) {
00046                 deleteLater();
00047                 return false;
00048         }
00049         loadTime.start();
00050         guiUpdateTimer.start(GUI_UPDATE_INTERVAL, true);
00051         return true;
00052 }
00053 
00054 void DataLoader::on_eof() {
00055 
00056         isProcExited = true;
00057 
00058         if (parsing && guiUpdateTimer.isActive())
00059                 dbs("ASSERT in DataLoader: timer active while parsing");
00060 
00061         if (parsing == guiUpdateTimer.isActive() && !canceling)
00062                 dbs("ASSERT in DataLoader: inconsistent timer");
00063 
00064         if (guiUpdateTimer.isActive()) // no need to wait anymore
00065                 guiUpdateTimer.start(1, true);
00066 }
00067 
00068 void DataLoader::on_timeout() {
00069 
00070         if (canceling) {
00071                 deleteLater();
00072                 return; // we leave with guiUpdateTimer not active
00073         }
00074         parsing = true;
00075 
00076         // process could exit while we are processing so save the flag now
00077         bool lastBuffer = isProcExited;
00078         loadedBytes += readNewData(lastBuffer);
00079         emit newDataReady(fh); // inserting in list view is about 3% of total time
00080 
00081         if (lastBuffer) {
00082                 emit loaded(fh, loadedBytes, loadTime.elapsed(), true, "", "");
00083                 deleteLater();
00084 
00085         } else if (isProcExited) { // exited while parsing
00086                 dbs("Exited while parsing!!!!");
00087                 guiUpdateTimer.start(1, true);
00088         } else
00089                 guiUpdateTimer.start(GUI_UPDATE_INTERVAL, true);
00090 
00091         parsing = false;
00092 }
00093 
00094 void DataLoader::parseSingleBuffer(const QByteArray& ba) {
00095 
00096         if (ba.size() == 0 || canceling)
00097                 return;
00098 
00099         int ofs = 0, newOfs, bz = ba.size();
00100         while (bz - ofs > 0) {
00101 
00102                 if (!halfChunk) {
00103 
00104                         newOfs = git->addChunk(fh, ba, ofs);
00105                         if (newOfs == -1)
00106                                 break; // half chunk detected
00107 
00108                         ofs = newOfs;
00109 
00110                 } else { // less then 1% of cases with READ_BLOCK_SIZE = 64KB
00111 
00112                         int end = ba.find('\0', 0);
00113                         if (end == -1) // consecutives half chunks
00114                                 break;
00115 
00116                         ofs = end + 1;
00117                         QGit::baAppend(&halfChunk, &(ba[0]), ofs);
00118                         fh->rowData.append(halfChunk);
00119                         addSplittedChunks(halfChunk);
00120                         halfChunk = NULL;
00121                 }
00122         }
00123         // save any remaining half chunk
00124         if (bz - ofs > 0)
00125                 QGit::baAppend(&halfChunk, &(ba[ofs]),  bz - ofs);
00126 }
00127 
00128 void DataLoader::addSplittedChunks(const QByteArray* hc) {
00129 
00130         if (hc->at(hc->size() - 1) != 0) {
00131                 dbs("ASSERT in DataLoader, bad half chunk");
00132                 return;
00133         }
00134         // do not assume we have only one chunk in hc
00135         int ofs = 0;
00136         while (ofs != -1 && ofs != (int)hc->size())
00137                 ofs = git->addChunk(fh, *hc, ofs);
00138 }
00139 
00140 // *************** git interface facility dependant code *****************************
00141 
00142 #ifdef USE_QPROCESS
00143 
00144 DataLoader::~DataLoader() {}
00145 
00146 void DataLoader::on_cancel() {
00147 
00148         if (!canceling) { // just once
00149                 canceling = true;
00150                 if (proc)
00151                         proc->tryTerminate();
00152         }
00153 }
00154 
00155 bool DataLoader::doStart(SCList args, SCRef wd) {
00156 
00157         proc = new QProcess(args, this);
00158         proc->setWorkingDirectory(wd);
00159         if (!proc->start())
00160                 return false;
00161 
00162         connect(proc, SIGNAL(processExited()), this, SLOT(on_eof()));
00163         // signal readyReadStdout() is not connected, read is timeout based. Faster.
00164         return true;
00165 }
00166 
00167 void DataLoader::on_procDataReady(const QByteArray&) { /* timeout based */ }
00168 
00169 ulong DataLoader::readNewData(bool) {
00170 
00171         /*
00172            QByteArray copy c'tor uses shallow copy, but there is a deep copy in
00173            QProcess::readStdout(), from an internal buffers list to return value.
00174 
00175            Qt uses a select() to detect new data is ready, copies immediately the
00176            data to the heap with a read() and stores the pointer to new data in a
00177            pointer list, from qprocess_unix.cpp:
00178 
00179                 const int basize = 4096;
00180                 QByteArray *ba = new QByteArray( basize );
00181                 n = ::read( fd, ba->data(), basize );
00182                 buffer->append( ba ); // added to a QPtrList<QByteArray> pointer list
00183 
00184            When we call QProcess::readStdout() data from buffers pointed by the
00185            pointer list is memcpy() to the function return value, from qprocess.cpp:
00186 
00187                 ....
00188                 return buf->readAll(); // memcpy() here
00189         */
00190         QByteArray* ba = new QByteArray(proc->readStdout());
00191         if (ba->size() == 0) {
00192                 delete ba;
00193                 return 0;
00194         }
00195         fh->rowData.append(ba);
00196         parseSingleBuffer(*ba);
00197         return ba->size();
00198 }
00199 
00200 #else // temporary file as data exchange facility
00201 
00202 DataLoader::~DataLoader() {
00203 
00204         if (dataFile.isOpen())
00205                 dataFile.close();
00206 
00207         QDir dir;
00208         dir.remove(dataFileName);
00209         dir.remove(scriptFileName);
00210 }
00211 
00212 void DataLoader::on_cancel() {
00213 
00214         if (!canceling) { // just once
00215                 canceling = true;
00216                 git->cancelProcess(proc);
00217                 if (!procPID.isEmpty()) {
00218                         git->errorReportingEnabled = false; // hide error when already terminated
00219                         git->run("kill " + procPID.stripWhiteSpace());
00220                         git->errorReportingEnabled = true;
00221                 }
00222         }
00223 }
00224 
00225 bool DataLoader::doStart(SCList args, SCRef wd) {
00226 
00227         // create a script to redirect 'git rev-list' stdout to dataFile
00228         const QString tmpfsDir("/tmp");
00229         QDir dir(tmpfsDir); // use a tmpfs mounted filesystem if available
00230         bool useTmpfsDir = (dir.exists() && dir.isReadable());
00231         bool noexec;
00232         srand (time(NULL));
00233         do {
00234                 do {
00235                         // ensure unique names for our DataLoader instance file
00236                         QString t = QString::number(rand(), 16);
00237                         dataFileName = "/qgit_" + t + ".txt";
00238                         scriptFileName = "/qgit_" + t + ".sh";
00239                         dataFileName.prepend(useTmpfsDir ? tmpfsDir : wd);
00240                         scriptFileName.prepend(useTmpfsDir ? tmpfsDir : wd);
00241                 } while (dir.exists(dataFileName) || dir.exists(scriptFileName));
00242 
00243                 /* in case we use '/tmp' be sure is writable and executable */
00244                 noexec = ( useTmpfsDir &&
00245                           !QGit::writeToFile(scriptFileName, "test exec bit", true));
00246                 if (noexec)
00247                         useTmpfsDir = false;
00248         } while (noexec);
00249 
00250         dataFile.setName(dataFileName);
00251         QString runCmd;
00252         FOREACH_SL (it, args)
00253                 if ((*it).contains(' '))
00254                         runCmd.append("\"" + *it + "\" ");
00255                 else
00256                         runCmd.append(*it + " ");
00257 
00258         runCmd.append("> " +  dataFileName);
00259         runCmd.append(" &\necho $!\nwait"); // we want to read git-rev-list PID
00260         runCmd.prepend("cd " + wd + "\n");
00261         if (!QGit::writeToFile(scriptFileName, runCmd, true))
00262                 return false;
00263 
00264         proc = git->runAsync(scriptFileName, this, "");
00265         return (proc != NULL);
00266 }
00267 
00268 void DataLoader::on_procDataReady(const QByteArray& data) {
00269 // the script sends pid of launched git-rev-list, to be used for canceling
00270 
00271         procPID.append(data);
00272 }
00273 
00274 ulong DataLoader::readNewData(bool lastBuffer) {
00275 
00276         bool ok =     dataFile.isOpen()
00277                   || (dataFile.exists() && dataFile.open(IO_Raw | IO_ReadOnly));
00278         if (!ok)
00279                 return 0;
00280 
00281         ulong cnt = 0;
00282         while (!dataFile.atEnd()) {
00283 
00284                 QByteArray* ba = new QByteArray(READ_BLOCK_SIZE);
00285 
00286                 // this is the ONLY deep copy involved in the whole loading
00287                 // QFile::readBlock() calls standard C read() function when
00288                 // file is open with IO_Raw flag, or fread() otherwise
00289                 uint len = dataFile.readBlock(ba->data(), READ_BLOCK_SIZE);
00290                 if (len <= 0) {
00291                         delete ba;
00292                         break;
00293 
00294                 } else if (len < READ_BLOCK_SIZE) // unlikely
00295                         ba->resize(len, QGArray::SpeedOptim);
00296 
00297                 cnt += len;
00298                 fh->rowData.append(ba);
00299                 parseSingleBuffer(*ba);
00300 
00301                 // avoid reading small chunks if data producer is still running
00302                 if (len < READ_BLOCK_SIZE && !lastBuffer)
00303                         break;
00304         }
00305         return cnt;
00306 }
00307 
00308 #endif // USE_QPROCESS

Generated on Fri Dec 7 21:57:37 2007 for QGit by  doxygen 1.5.3