diff --git a/mongoimport/mongoimport.go b/mongoimport/mongoimport.go index 2b2fde2..dee7ff0 100644 --- a/mongoimport/mongoimport.go +++ b/mongoimport/mongoimport.go @@ -601,7 +601,7 @@ func (imp *MongoImport) getInputReader(in io.Reader) (InputReader, error) { if imp.InputOptions.Type == CSV { return NewCSVInputReader(colSpecs, in, out, imp.IngestOptions.NumDecodingWorkers, ignoreBlanks), nil } else if imp.InputOptions.Type == TSV { - return NewTSVInputReader(colSpecs, in, out, imp.IngestOptions.NumDecodingWorkers, ignoreBlanks), nil + return NewTSVInputReader(colSpecs, in, out, imp.IngestOptions.NumDecodingWorkers, ignoreBlanks, imp.InputOptions.TsvDelim), nil } return NewJSONInputReader(imp.InputOptions.JSONArray, in, imp.IngestOptions.NumDecodingWorkers), nil } diff --git a/mongoimport/options.go b/mongoimport/options.go index 1e59ad4..d62aa66 100644 --- a/mongoimport/options.go +++ b/mongoimport/options.go @@ -29,6 +29,9 @@ type InputOptions struct { // Specifies the file type to import. The default format is JSON, but it’s possible to import CSV and TSV files. Type string `long:"type" value-name:"" default:"json" default-mask:"-" description:"input format to import: json, csv, or tsv (defaults to 'json')"` + // Specifies the file type to import. The default format is JSON, but it’s possible to import CSV and TSV files. + TsvDelim string `long:"tsvDelim" value-name:"" default:"\t" default-mask:"-" description:"input delimiter for tsv (defaults to '\t')"` + // Indicates that field names include type descriptions ColumnsHaveTypes bool `long:"columnsHaveTypes" description:"indicated that the field list (from --fields, --fieldsFile, or --headerline) specifies types; They must be in the form of '.()'. The type can be one of: auto, binary, bool, date, date_go, date_ms, date_oracle, double, int32, int64, string. For each of the date types, the argument is a datetime layout string. For the binary type, the argument can be one of: base32, base64, hex. All other types take an empty argument. Only valid for CSV and TSV imports. e.g. zipcode.string(), thumbnail.binary(base64)"` } diff --git a/mongoimport/tsv.go b/mongoimport/tsv.go index 09aadc6..dcd45d3 100644 --- a/mongoimport/tsv.go +++ b/mongoimport/tsv.go @@ -11,7 +11,6 @@ import ( const ( entryDelimiter = '\n' - tokenSeparator = "\t" ) // TSVInputReader is a struct that implements the InputReader interface for a @@ -41,6 +40,9 @@ type TSVInputReader struct { // ignoreBlanks is whether empty fields should be ignored ignoreBlanks bool + + // defines the token separator + tokenSeparator string } // TSVConverter implements the Converter interface for TSV input. @@ -50,11 +52,12 @@ type TSVConverter struct { index uint64 ignoreBlanks bool rejectWriter io.Writer + tokenSeparator string } // NewTSVInputReader returns a TSVInputReader configured to read input from the // given io.Reader, extracting the specified columns only. -func NewTSVInputReader(colSpecs []ColumnSpec, in io.Reader, rejects io.Writer, numDecoders int, ignoreBlanks bool) *TSVInputReader { +func NewTSVInputReader(colSpecs []ColumnSpec, in io.Reader, rejects io.Writer, numDecoders int, ignoreBlanks bool, tokenSeparator string) *TSVInputReader { szCount := newSizeTrackingReader(newBomDiscardingReader(in)) return &TSVInputReader{ colSpecs: colSpecs, @@ -64,6 +67,7 @@ func NewTSVInputReader(colSpecs []ColumnSpec, in io.Reader, rejects io.Writer, n numDecoders: numDecoders, sizeTracker: szCount, ignoreBlanks: ignoreBlanks, + tokenSeparator: tokenSeparator, } } @@ -74,7 +78,7 @@ func (r *TSVInputReader) ReadAndValidateHeader() (err error) { if err != nil { return err } - for _, field := range strings.Split(header, tokenSeparator) { + for _, field := range strings.Split(header, r.tokenSeparator) { r.colSpecs = append(r.colSpecs, ColumnSpec{ Name: strings.TrimRight(field, "\r\n"), Parser: new(FieldAutoParser), @@ -91,7 +95,7 @@ func (r *TSVInputReader) ReadAndValidateTypedHeader(parseGrace ParseGrace) (err return err } var headerFields []string - for _, field := range strings.Split(header, tokenSeparator) { + for _, field := range strings.Split(header, r.tokenSeparator) { headerFields = append(headerFields, strings.TrimRight(field, "\r\n")) } r.colSpecs, err = ParseTypedHeaders(headerFields, parseGrace) @@ -129,6 +133,7 @@ func (r *TSVInputReader) StreamDocument(ordered bool, readDocs chan bson.D) (ret index: r.numProcessed, ignoreBlanks: r.ignoreBlanks, rejectWriter: r.tsvRejectWriter, + tokenSeparator: r.tokenSeparator, } r.numProcessed++ } @@ -147,7 +152,7 @@ func (r *TSVInputReader) StreamDocument(ordered bool, readDocs chan bson.D) (ret func (c TSVConverter) Convert() (b bson.D, err error) { b, err = tokensToBSON( c.colSpecs, - strings.Split(strings.TrimRight(c.data, "\r\n"), tokenSeparator), + strings.Split(strings.TrimRight(c.data, "\r\n"), c.tokenSeparator), c.index, c.ignoreBlanks, )