when we test performance with testReadNextBatchRow in PR2816, and ``` for (int j = 0; j < length; j++) { i++; jobject row = env->GetObjectArrayElement(batch, j); if (i > 1 && i % printNum == 0) { gettimeofday(&read, NULL); double hasNextTime = 1000000 * (startReadNextBatchRow.tv_sec - startHasNext.tv_sec) + startReadNextBatchRow.tv_usec - startHasNext.tv_usec; double readNextBatchTime = 1000000 * (endReadNextBatchRow.tv_sec - startReadNextBatchRow.tv_sec) + endReadNextBatchRow.tv_usec - startReadNextBatchRow.tv_usec; time = 1000000 * (read.tv_sec - startHasNext.tv_sec) + read.tv_usec - startHasNext.tv_usec; printf("%d: time is %lf s, speed is %lf records/s, hasNext time is %lf s,readNextBatchRow time is %lf s ", i, time / 1000000.0, printNum / (time / 1000000.0), hasNextTime / 1000000.0, readNextBatchTime / 1000000.0); gettimeofday(&startHasNext, NULL); printf("%s\t", carbonRow.getString(0)); printf("%s\t", carbonRow.getString(1)); printf("%s\t", carbonRow.getString(2)); printf("%s\t", carbonRow.getString(3)); printf("%ld\t", carbonRow.getLong(4)); printf("%ld\t", carbonRow.getLong(5)); printf("\n"); } env->DeleteLocalRef(row); } } else { i = i + length; } env->DeleteLocalRef(batch); } ``` The performance is 1.6million records/s for read. But if I add ``` carbonRow.setCarbonRow(row); carbonRow.getString(0); carbonRow.getString(1); carbonRow.getString(2); carbonRow.getString(3); carbonRow.getLong(4); carbonRow.getLong(5); ``` for each row, the performance will decrease to 0.3 million records/s. for (int j = 0; j < length; j++) { i++; jobject row = env->GetObjectArrayElement(batch, j); carbonRow.setCarbonRow(row); carbonRow.getString(0); carbonRow.getString(1); carbonRow.getString(2); carbonRow.getString(3); carbonRow.getLong(4); carbonRow.getLong(5); if (i > 1 && i % printNum == 0) { gettimeofday(&read, NULL); double hasNextTime = 1000000 * (startReadNextBatchRow.tv_sec - startHasNext.tv_sec) + startReadNextBatchRow.tv_usec - startHasNext.tv_usec; double readNextBatchTime = 1000000 * (endReadNextBatchRow.tv_sec - startReadNextBatchRow.tv_sec) + endReadNextBatchRow.tv_usec - startReadNextBatchRow.tv_usec; time = 1000000 * (read.tv_sec - startHasNext.tv_sec) + read.tv_usec - startHasNext.tv_usec; printf("%d: time is %lf s, speed is %lf records/s, hasNext time is %lf s,readNextBatchRow time is %lf s ", i, time / 1000000.0, printNum / (time / 1000000.0), hasNextTime / 1000000.0, readNextBatchTime / 1000000.0); gettimeofday(&startHasNext, NULL); printf("%s\t", carbonRow.getString(0)); printf("%s\t", carbonRow.getString(1)); printf("%s\t", carbonRow.getString(2)); printf("%s\t", carbonRow.getString(3)); printf("%ld\t", carbonRow.getLong(4)); printf("%ld\t", carbonRow.getLong(5)); printf("\n"); } env->DeleteLocalRef(row); } } else { i = i + length; } env->DeleteLocalRef(batch); } ``` when we get string or int data type data by carbonRow.getString/getInt, which will call JNI, it need some time, so how to improve the C++ SDK performance? Any good idea for it? Do you know better idea for how to convert object to primitive data type like in, float? -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
Hi, anyone has good suggestion for it? I want to improve the performance for
it. -- Sent from: http://apache-carbondata-dev-mailing-list-archive.1130556.n5.nabble.com/ |
Free forum by Nabble | Edit this page |