Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1584 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/2181/ --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1584#discussion_r158452228 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/filesystem/S3CarbonFile.java --- @@ -0,0 +1,445 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.filesystem; + +import java.io.BufferedInputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.impl.CarbonS3FileSystem; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.util.CarbonUtil; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.io.compress.BZip2Codec; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.hadoop.io.compress.GzipCodec; + +public class S3CarbonFile implements CarbonFile { --- End diff -- Why we need this file, can't we use HDFSCarbonFile directly? --- |
In reply to this post by qiuchenjian-2
Github user jackylk commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1584#discussion_r158452527 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/filesystem/S3CarbonFile.java --- @@ -0,0 +1,445 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.filesystem; + +import java.io.BufferedInputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.impl.CarbonS3FileSystem; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.util.CarbonUtil; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.io.compress.BZip2Codec; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.hadoop.io.compress.GzipCodec; + +public class S3CarbonFile implements CarbonFile { + + private static final LogService LOGGER = + LogServiceFactory.getLogService(S3CarbonFile.class.getName()); + private static Configuration configuration = null; + + static { + configuration = new Configuration(); --- End diff -- I think we should use the Configuration object from the job instead of creating it ourselves --- |
In reply to this post by qiuchenjian-2
Github user QiangCai commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1584#discussion_r158453724 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/filesystem/S3CarbonFile.java --- @@ -0,0 +1,445 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.filesystem; + +import java.io.BufferedInputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.carbondata.common.logging.LogService; +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.impl.CarbonS3FileSystem; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.util.CarbonUtil; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.io.compress.BZip2Codec; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.hadoop.io.compress.GzipCodec; + +public class S3CarbonFile implements CarbonFile { + + private static final LogService LOGGER = + LogServiceFactory.getLogService(S3CarbonFile.class.getName()); + private static Configuration configuration = null; + + static { + configuration = new Configuration(); --- End diff -- Carbon should support to get configuration from user session (like SparkSession) to support multiple users. Different users can use different S3 credential in the same cluster. --- |
In reply to this post by qiuchenjian-2
Github user QiangCai commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1584#discussion_r158614292 --- Diff: core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java --- @@ -1332,7 +1337,7 @@ */ @CarbonProperty public static final String CARBON_UPDATE_SEGMENT_PARALLELISM = - "carbon.update.segment.parallelism"; + "carbon.update.segment.parallelism"; --- End diff -- incorrect indentation --- |
In reply to this post by qiuchenjian-2
Github user QiangCai commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1584#discussion_r158614764 --- Diff: core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java --- @@ -762,14 +762,12 @@ public static String checkAndAppendFileSystemURIScheme(String filePath) { } private static boolean checkIfPrefixExists(String path) { - final String lowerPath = path.toLowerCase(Locale.getDefault()); - return lowerPath.startsWith(CarbonCommonConstants.HDFSURL_PREFIX) || - lowerPath.startsWith(CarbonCommonConstants.VIEWFSURL_PREFIX) || - lowerPath.startsWith(CarbonCommonConstants.LOCAL_FILE_PREFIX) || - lowerPath.startsWith(CarbonCommonConstants.ALLUXIOURL_PREFIX) || - lowerPath.startsWith(CarbonCommonConstants.S3N_PREFIX) || - lowerPath.startsWith(CarbonCommonConstants.S3_PREFIX) || - lowerPath.startsWith(CarbonCommonConstants.S3A_PREFIX); + final String lowerPath = path.toLowerCase(); + return lowerPath.startsWith(CarbonCommonConstants.HDFSURL_PREFIX) || lowerPath --- End diff -- better to move "lowerPath" to next row --- |
In reply to this post by qiuchenjian-2
Github user QiangCai commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1584#discussion_r158614229 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/impl/FileFactory.java --- @@ -360,9 +361,10 @@ public static long getDirectorySize(String filePath) throws IOException { case ALLUXIO: case VIEWFS: case S3: - Path path = new Path(filePath); - FileSystem fs = path.getFileSystem(configuration); - return fs.getContentSummary(path).getLength(); + Path s3Path = new Path(filePath); --- End diff -- suggest moving S3 case into seperated block case HDFS: case ALLUXIO: case VIEWFS: ..... old code case S3: .....new code --- |
In reply to this post by qiuchenjian-2
Github user QiangCai commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1584#discussion_r158614782 --- Diff: core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java --- @@ -762,14 +762,12 @@ public static String checkAndAppendFileSystemURIScheme(String filePath) { } private static boolean checkIfPrefixExists(String path) { - final String lowerPath = path.toLowerCase(Locale.getDefault()); - return lowerPath.startsWith(CarbonCommonConstants.HDFSURL_PREFIX) || - lowerPath.startsWith(CarbonCommonConstants.VIEWFSURL_PREFIX) || - lowerPath.startsWith(CarbonCommonConstants.LOCAL_FILE_PREFIX) || - lowerPath.startsWith(CarbonCommonConstants.ALLUXIOURL_PREFIX) || - lowerPath.startsWith(CarbonCommonConstants.S3N_PREFIX) || --- End diff -- why remove S3N_PREFIX and S3_PREFIX ? --- |
In reply to this post by qiuchenjian-2
Github user QiangCai commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1584#discussion_r158615665 --- Diff: core/src/main/java/org/apache/carbondata/core/datastore/impl/CarbonS3FileSystem.java --- @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.datastore.impl; + +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import static java.nio.file.Files.createDirectories; +import static java.nio.file.Files.createTempFile; +import static java.util.Objects.requireNonNull; + +import org.apache.carbondata.core.util.CarbonProperties; +import static org.apache.carbondata.core.constants.CarbonCommonConstants.PATH_SEPARATOR; +import static org.apache.carbondata.core.constants.CarbonCommonConstants.S3_STAGING_DIRECTORY; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.util.Progressable; + +import static org.apache.hadoop.fs.s3a.Constants.ACCESS_KEY; +import static org.apache.hadoop.fs.s3a.Constants.MIN_MULTIPART_THRESHOLD; +import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_SIZE; +import static org.apache.hadoop.fs.s3a.Constants.SECRET_KEY; + +public class CarbonS3FileSystem extends S3AFileSystem { + + private URI uri; + private File stagingDirectory; + + @Override public void initialize(URI uri, Configuration conf) throws IOException { + requireNonNull(uri, "uri is null"); + requireNonNull(conf, "conf is null"); + setConf(conf); + this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority()); + new Path(PATH_SEPARATOR).makeQualified(this.uri, new Path(PATH_SEPARATOR)); + + CarbonProperties defaults = CarbonProperties.getInstance(); + if (defaults.getProperty(ACCESS_KEY) != null) { --- End diff -- no need to support AKSK configuration in carbon.properties. --- |
In reply to this post by qiuchenjian-2
Github user QiangCai commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1584#discussion_r158614858 --- Diff: core/pom.xml --- @@ -94,6 +99,20 @@ </exclusion> </exclusions> </dependency> + + <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws --> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-aws</artifactId> --- End diff -- can Carbondata only use Hadoop API to support S3ï¼ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1584 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/2396/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1584 Build Failed with Spark 2.2.0, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/1173/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1584 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/2401/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1584 Build Failed with Spark 2.2.0, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/1178/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1584 Build Failed with Spark 2.1.0, Please check CI http://136.243.101.176:8080/job/ApacheCarbonPRBuilder1/2406/ --- |
In reply to this post by qiuchenjian-2
Github user CarbonDataQA commented on the issue:
https://github.com/apache/carbondata/pull/1584 Build Failed with Spark 2.2.0, Please check CI http://88.99.58.216:8080/job/ApacheCarbonPRBuilder/1182/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1584 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/2597/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1584 SDV Build Success , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/2599/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1584 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/2601/ --- |
In reply to this post by qiuchenjian-2
Github user ravipesala commented on the issue:
https://github.com/apache/carbondata/pull/1584 SDV Build Fail , Please check CI http://144.76.159.231:8080/job/ApacheSDVTests/2603/ --- |
Free forum by Nabble | Edit this page |