最終更新: haruyama_seigo 2013年03月12日(火) 11:22:02履歴
Solr 4.2 の exampleディレクトリの以下で
した際の index fileの形式を見る
以下のクラスでシリアライズ, デシリアライズ
% java -jar start.jar
した際の index fileの形式を見る
以下のクラスでシリアライズ, デシリアライズ
- org.apache.lucene.store.DataInput
- org.apache.lucene.store.DataOutput
% od -A x -t x1 index/segments.gen 000000 ff ff ff fe 00 00 00 00 00 00 00 01 00 00 00 00 000010 00 00 00 01 000014
- GenHeader
- ff ff ff fe
- org.apache.lucene.index.SegmentInfos.FORMAT_SEGMENTS_GEN_CURRENT(-2)
- ff ff ff fe
- Generation
- 00 00 00 00 00 00 00 01
- Generation
- 00 00 00 00 00 00 00 01
000000 3f d7 6c 17 08 73 65 67 6d 65 6e 74 73 00 00 00 000010 00 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 000020 00 00 00 00 00 00 00 00 00 90 80 db ff 00002d
- Header
- 3f d7 6c 17
- org.apache.lucene.codecs.CodecUtil.CODE_MAGIC
- 08 73 65 67 6d 65 6e 74 73
- "segments"
- 00 00 00 00
- org.apache.lucene.index.SegmentInfos.VERSION_40
- 3f d7 6c 17
- Version
- 00 00 00 00 00 00 00 01
- NameCounter
- 00 00 00 00
- SegCount
- 00 00 00 00
- CommitUserData
- 00 00 00 00
- CheckSum
- 00 00 00 00 90 80 db ff
curl http://localhost:8983/solr/update?commit=true -H "Content-Type: text/xml" --data-binary '<add><doc><field name="id">testdoc</field></doc></add>'
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" /> <field name="_version_" type="long" indexed="true" stored="true"/>
- index/segments.gen
- index/segments_2
- index/_0.si
- index/_0.fdt
- index/_0.fdx
- index/_0.fnm
- index/_0_Lucene41_0.doc
- index/_0_Lucene41_0.tim
- index/_0_Lucene41_0.tip
- index/write.lock
- tlog/tlog.0000000000000000000
000000 3f d7 6c 17 08 73 65 67 6d 65 6e 74 73 00 00 00 000010 00 00 00 00 00 00 00 00 03 00 00 00 01 00 00 00 000020 01 02 5f 30 08 4c 75 63 65 6e 65 34 32 ff ff ff 000030 ff ff ff ff ff 00 00 00 00 00 00 00 01 0e 63 6f 000040 6d 6d 69 74 54 69 6d 65 4d 53 65 63 0d 31 33 36 000050 33 30 30 35 32 38 37 35 33 30 00 00 00 00 a0 ca 000060 af d9 000062
- Header
- 3f d7 6c 17
- org.apache.lucene.codecs.CodecUtil.CODE_MAGIC
- 08 73 65 67 6d 65 63 74 73
- "segments"
- 00 00 00 00
- org.apache.lucene.index.SegmentInfos.VERSION_40
- 3f d7 6c 17
- Version
- 00 00 00 00 00 00 00 03
- NameCounter
- 00 00 00 01
- SegCount
- 00 00 00 01
- SegName
- 02 5f 30
- _0
- 02 5f 30
- SegCodec
- 08 4c 75 63 65 6e 65 34 32
- "Lucene42"
- 08 4c 75 63 65 6e 65 34 32
- DelGen
- ff ff ff ff ff ff ff ff
- org.apache.lucene.index.SegmentInfoPerCommit.delGen
- Generation number of the live docs file (-1 if there are no deletes yet):
- ff ff ff ff ff ff ff ff
- DeletionCount
- 00 00 00 00
- CommitUserData
- size
- 00 00 00 01
- key
- 0e 63 6f 6d 6d 69 74 54 69 6d 65 4d 53 65 63
- "commitTimeMSec"
- value
- 0d 31 33 36 33 30 30 35 32 38 37 35 33 30
- "1363005287530"
- size
- CheckSum
- 00 00 00 00 a0 ca af d9
000000 3f d7 6c 17 18 4c 75 63 65 6e 65 34 31 53 74 6f 000010 72 65 64 46 69 65 6c 64 73 44 61 74 61 00 00 00 000020 00 01 00 01 02 12 f0 03 00 07 74 65 73 74 64 6f 000030 63 0c 13 d5 97 22 c5 30 00 00 00003a
- Header
- 3f d7 6c 17
- org.apache.lucene.codecs.CodecUtil.CODE_MAGIC
- 18 4c 75 63 65 6e 65 34 31 53 74 6f 72 65 64 46 69 65 6c 64 73 44 61 74 61
- "Lucene41StoredFieldsData"
- 00 00 00 00
- version
- 3f d7 6c 17
- PackedIntsVersion
- 01
- org.apache.lucene.util.packed.PackedInts.VERSION_CURRENT
- 01
- DocBase
- 00
- ChunkDocs
- 01
- DocFieldCounts(org.apache.lucene.codecs.compressing.CompressingStoredFieldsReader L209)
- 02
- DocLengths
- 12
- org.apache.lucene.codecs.compressing.LZ4.decompress L85
- f0 03
- literalLen = (0xf0 >>> 4) + 0x03 = 0x12
- f0 03
- FiledNumAndType
- 00
- (String, 0)
- 00
- Value
- 07 74 65 73 74 64 6f 63
- "testdoc"
- 07 74 65 73 74 64 6f 63
- FieldNumAndType
- 0c(Long, 1)
- 13 d5 97 22 c5 30 00 00
- 0c(Long, 1)
000000 3f d7 6c 17 19 4c 75 63 65 6e 65 34 31 53 74 6f 000010 72 65 64 46 69 65 6c 64 73 49 6e 64 65 78 00 00 000020 00 00 01 01 00 00 01 00 22 00 01 00 00 00002d
- Header
- 3f d7 6c 17
- org.apache.lucene.codecs.CodecUtil.CODE_MAGIC
- 19 4c 75 63 65 6e 65 34 31 53 74 6f 72 65 64 46 69 65 6c 64 73 49 6e 64 65 78
- "Lucene41StoredFieldsIndex"
- 00 00 00 00
- version
- 3f d7 6c 17
- PackedIntsVersion
- 01
- BlockChunks
- 01
- DocBase
- 00
- AvgChunkDocs
- 00
- BitsPerDocBaseDelta
- 01
- DocBaseDelta
- 00
- StarPointerBase
- 22
- _0.fdt の 0x22 から Documentの情報がある
- 22
- AvgChunkSize
- 00
- BitsPerStartPointerDelta
- 01
- StartPointerDeltas
- 00
- BlocksEndMarker
- 00
000000 3f d7 6c 17 12 4c 75 63 65 6e 65 34 32 46 69 65 000010 6c 64 49 6e 66 6f 73 00 00 00 00 02 02 69 64 00 000020 51 00 00 00 00 02 1d 50 65 72 46 69 65 6c 64 50 000030 6f 73 74 69 6e 67 73 46 6f 72 6d 61 74 2e 66 6f 000040 72 6d 61 74 08 4c 75 63 65 6e 65 34 31 1d 50 65 000050 72 46 69 65 6c 64 50 6f 73 74 69 6e 67 73 46 6f 000060 72 6d 61 74 2e 73 75 66 66 69 78 01 30 09 5f 76 000070 65 72 73 69 6f 6e 5f 01 51 00 00 00 00 02 1d 50 000080 65 72 46 69 65 6c 64 50 6f 73 74 69 6e 67 73 46 000090 6f 72 6d 61 74 2e 66 6f 72 6d 61 74 08 4c 75 63 0000a0 65 6e 65 34 31 1d 50 65 72 46 69 65 6c 64 50 6f 0000b0 73 74 69 6e 67 73 46 6f 72 6d 61 74 2e 73 75 66 0000c0 66 69 78 01 30 0000c5
- Header
- 3f d7 6c 17
- org.apache.lucene.codecs.CodecUtil.CODE_MAGIC
- 12 4c 75 63 65 6e 65 34 30 46 69 65 6c 64 49 6e 66 6f 73
- "Lucene40FieldInfos"
- 00 00 00 00
- version
- 3f d7 6c 17
- FieldsCount
- 02
- FieldName
- 02 69 64
- "id"
- 02 69 64
- FieldNumber
- 00
- FieldBits
- 51 (1010001b)
- indexed, norm-omitted, term-freq and -pos ommited
- 51 (1010001b)
- DocValuesBits
- 00
- Attriutes
- 00 00 00 02
- size
- "PerFieldPostingsFormat.format" : "Lucene41"
- "PerFieldPostingsFormat.suffix" : "0"
- 00 00 00 02
- FieldName
- 09 5f 76 65 72 73 69 6f 6e 5f
- "_version_"
- 09 5f 76 65 72 73 69 6f 6e 5f
- FieldNumber
- 01
- FieldBits
- 51 (1010001b)
- indexed, norm-omitted, term-freq and -pos ommited
- 51 (1010001b)
- DocValuesBits
- 00
- Attriutes
- 00 00 00 02
- size
- "PerFieldPostingsFormat.format" : "Lucene41"
- "PerFieldPostingsFormat.suffix" : "0"
- 00 00 00 02
000000 3f d7 6c 17 13 4c 75 63 65 6e 65 34 30 53 65 67 000010 6d 65 6e 74 49 6e 66 6f 00 00 00 00 03 34 2e 32 000020 00 00 00 01 ff 00 00 00 08 09 74 69 6d 65 73 74 000030 61 6d 70 0d 31 33 36 33 30 30 35 32 38 37 35 35 000040 31 0a 6f 73 2e 76 65 72 73 69 6f 6e 0d 33 2e 32 000050 2e 30 2d 34 2d 61 6d 64 36 34 02 6f 73 05 4c 69 000060 6e 75 78 0e 6c 75 63 65 6e 65 2e 76 65 72 73 69 000070 6f 6e 2b 34 2e 32 2e 30 20 31 34 35 33 36 39 34 000080 20 2d 20 72 6d 75 69 72 20 2d 20 32 30 31 33 2d 000090 30 33 2d 30 36 20 32 32 3a 32 35 3a 32 39 06 73 0000a0 6f 75 72 63 65 05 66 6c 75 73 68 07 6f 73 2e 61 0000b0 72 63 68 05 61 6d 64 36 34 0c 6a 61 76 61 2e 76 0000c0 65 72 73 69 6f 6e 08 31 2e 37 2e 30 5f 30 33 0b 0000d0 6a 61 76 61 2e 76 65 6e 64 6f 72 12 4f 72 61 63 0000e0 6c 65 20 43 6f 72 70 6f 72 61 74 69 6f 6e 00 00 0000f0 00 00 00 00 00 07 06 5f 30 2e 66 6e 6d 11 5f 30 000100 5f 4c 75 63 65 6e 65 34 31 5f 30 2e 64 6f 63 06 000110 5f 30 2e 66 64 78 05 5f 30 2e 73 69 11 5f 30 5f 000120 4c 75 63 65 6e 65 34 31 5f 30 2e 74 69 6d 06 5f 000130 30 2e 66 64 74 11 5f 30 5f 4c 75 63 65 6e 65 34 000140 31 5f 30 2e 74 69 70 000147
- Header
- 3f d7 6c 17
- org.apache.lucene.codecs.CodecUtil.CODE_MAGIC
- 13 4c 75 63 65 6e 65 34 30 53 65 67 6d 65 6e 74 49 6e 66 6f
- "Lucene40SegmentInfo"
- 00 00 00 00
- version
- 3f d7 6c 17
- SegVersion
- 03 34 2e 32
- "4.2"
- 03 34 2e 32
- SegSize
- 00 00 00 1
- IsCompoundFile
- ff(-1)
- Diagnostic
- 08
- size
- timestamp : ...
- os.version : ...
- os : Linux
- lucene.version: ...
- source : flush
- os.arch: amd64
- java.version: 1.7.0_13
- java.vendor: Oracle Corporation
- 08
- Attributes
- 00 00 00 00
- Files
- _0.fnm
- _0_Lucene41_0.doc
- _0.fdx
- _0.si
- _0_Lucene41_0.tim
- _0.fdt
- _0_Lucene41_0.tip
000000 3f d7 6c 17 19 4c 75 63 65 6e 65 34 31 50 6f 73 000010 74 69 6e 67 73 57 72 69 74 65 72 44 6f 63 00 00 000020 00 00 01 20 21 02 23 04 05 06 07 08 09 0a 0b 0c 000030 0d 0e 0f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 000040 1d 1e 1f 000043
- Header
- 3f d7 6c 17
- org.apache.lucene.codecs.CodecUtil.CODE_MAGIC
- 19 4c 75 63 65 6e 65 34 31 50 6f 73 74 69 6e 67 73 57 72 69 74 65 72 44 6f 63
- "Lucene41PostingsWriterDoc"
- 00 00 00 00
- version
- 3f d7 6c 17
- org.apache.lucene.codecs.lucene41.ForUtil L99, L125
- PackedIntsVersion
- 01
- code(0x20): formatId(1), bitsPerValue(1)
- 20
- code(0x21): formatId(1), bitsPerValue(2)
- 21
- Lucene41PostingsFormat (Lucene 4.1.0 API)
- Term Dictionary
000000 3f d7 6c 17 15 42 4c 4f 43 4b 5f 54 52 45 45 5f 000010 54 45 52 4d 53 5f 44 49 43 54 00 00 00 01 3f d7 000020 6c 17 1b 4c 75 63 65 6e 65 34 31 50 6f 73 74 69 000030 6e 67 73 57 72 69 74 65 72 54 65 72 6d 73 00 00 000040 00 00 80 01 03 19 0b 20 01 13 6a 65 64 2c 29 40 000050 00 00 01 01 01 00 03 11 07 74 65 73 74 64 6f 63 000060 01 01 01 00 02 01 01 02 92 02 01 01 00 01 02 da 000070 02 01 01 00 00 00 00 00 00 00 64 00007b
- Header
- 3f d7 6c 17
- org.apache.lucene.codecs.CodecUtil.CODE_MAGIC
- 15 42 4c 4f 43 4b 5f 54 52 45 45 5f 54 45 52 4d 53 5f 44 49 43 54
- "BLOCK_TREE_TERMS_DICT"
- 00 00 00 01
- org.apache.lucene.codecs.BlockTreeTermsWriter.TERMS_INDEX_VERSION_CURRENT(TERMS_INDEX_VERSION_APPEND_ONLY)
- 3f d7 6c 17
- Postings Metadata
- 3f d7 6c 17
- org.apache.lucene.codecs.CodecUtil.CODE_MAGIC
- 1b 4c 75 63 65 6e 65 34 31 50 6f 73 74 69 6e 67 73 57 72 69 74 65 72 54 65 72 6d 73
- "Lucene41PostingsWriterTerms"
- org.apache.lucene.codecs.lucene41.Lucene41PostingsWriter.TERMS_CODEC
- 00 00 00 00
- Version
- 80 01
- 128
- PackedBlockSize
- 3f d7 6c 17
- Block
- SuffixBlock
- 03
- block header. src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java L807
- 19
- Write suffixes byte[] blob to terms dict output. src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java L922
- 0b
- SuffixLength
- 20 01 13 67 1b 56 5c 51 40 00 00
- Byte^Suffixlength
- 03
- StatsBlock
- 01
- StatsLength
- 01 01
- <DocFreq, TotalTermFreq>
- 01
- MetadataBlock
- 00
- MetaLength
- 00
- Block
- SuffixBlock
- 03
- block header.
- 11
- Write suffixes byte[] blob to terms dict output.
- 07
- SuffixLength
- 74 65 73 74 64 6f 63
- Byte^Suffixlength("testdoc")
- 03
- StatsBlock
- 01
- StatsLength
- 01 01
- <DocFreq, TotalTermFreq>
- 01
- MetadataBlock
- 00
- MetaLength
- 00
- FieldSummary
- NumField
- 02
- FieldNubmer
- 01
- NumTerms
- 01
- RootCodeLength
- 02
- Byte^RootCodeLength
- 92 02
- rootBlockFP(0x44) : field情報へのポインタ
- org.apache.lucene.codecs.BlockTreeTermsReader L475
- 92 02
- SumDocFreq
- 01
- DocCount
- 01
- FieldNubmer
- 00
- NumTerms
- 01
- RootCodeLength
- 02
- Byte^RootCodeLength
- da 02
- rootBlockFP(0x56)
- da 02
- SumDocFreq
- 01
- DocCount
- 01
- DisOffset
- 00 00 00 00 00 00 00 64
000000 3f d7 6c 17 16 42 4c 4f 43 4b 5f 54 52 45 45 5f 000010 54 45 52 4d 53 5f 49 4e 44 45 58 00 00 00 01 3f 000020 d7 6c 17 03 46 53 54 00 00 00 04 00 01 03 02 92 000030 02 00 00 00 00 00 01 00 3f d7 6c 17 03 46 53 54 000040 00 00 00 04 00 01 03 02 da 02 00 00 00 00 00 01 000050 00 1f 38 00 00 00 00 00 00 00 51 00005b
- Header
- 3f d7 6c 17
- org.apache.lucene.codecs.CodecUtil.CODE_MAGIC
- 16 42 4c 4f 43 4b 5f 54 52 45 45 5f 54 45 52 4d 53 5f 49 4e 44 45 58
- "BLOCK_TREE_TERMS_INDEX"
- 00 00 00 01
- org.apache.lucene.codecs.BlockTreeTermsWriter.TERMS_INDEX_VERSION_CURRENT(TERMS_INDEX_VERSION_APPEND_ONLY)
- 3f d7 6c 17
- FSTIndex^NumFields
- Postings Metadata
- 3f d7 6c 17
- org.apache.lucene.codecs.CodecUtil.CODE_MAGIC
- 03 46 53 54
- "FST"
- 00 00 00 04
- 3f d7 6c 17
- packed?
- 00
- emptyOutput != null
- 01
- emptyOutputBytes.length
- 03
- emptyOutputBytes
- 02 92 02
- inputType
- 00
- INPUT_TYPE.BYTE1
- 00
- startNode
- 00
- nodeCount
- 00
- arcCount
- 00
- archWithOutputCount
- 00
- bytes.length
- 01
- bytes
- 00
- Postings Metadata
- 3f d7 6c 17
- org.apache.lucene.codecs.CodecUtil.CODE_MAGIC
- 03 46 53 54
- "FST"
- 00 00 00 04
- 3f d7 6c 17
- packed?
- 00
- emptyOutput != null
- 01
- emptyOutputBytes.length
- 03
- emptyOutputBytes
- 02 da 02
- inputType
- 00
- INPUT_TYPE.BYTE1
- 00
- startNode
- 00
- nodeCount
- 00
- arcCount
- 00
- archWithOutputCount
- 00
- bytes.length
- 01
- bytes
- 00
- IndexStartFP
- 1f 38
- _0_Lucene41_0.tip 内でのポインタ
- 1f 38
- DirOffset
- 00 00 00 00 00 00 00 51
000000 02 0a 02 29 53 4f 4c 52 5f 54 4c 4f 47 41 27 73 000010 74 72 69 6e 67 73 82 22 69 64 29 5f 76 65 72 73 000020 69 6f 6e 5f 00 00 00 24 83 41 07 13 d5 97 22 c5 000030 30 00 00 10 02 08 3f 80 00 00 e1 27 74 65 73 74 000040 64 6f 63 e2 07 13 d5 97 22 c5 30 00 00 00 00 00 000050 25 83 44 60 2d 53 4f 4c 52 5f 54 4c 4f 47 5f 45 000060 4e 44 00 00 00 11 000066
- org.apache.solr.common.util.JavaBinCodec.VERSION
- 02
- NL
- 0a
- writeTag(MAP, 2)
- 02
- "SOLR_TLOG"
- writeTag(STR, sz)
- 29 ( 0x20 + 0x09)
- 53 4f 4c 52 5f 54 4c 4f 47
- writeTag(STR, sz)
- writeInt(1)
- 41
- "strings"
- writeTag(STR, sz)
- 27
- 73 74 72 69 6e 67 73
- writeTag(ARR, 2)
- 82
- writeTag(STR, sz)
- "id"
- 22
- 69 64
- "_version_"
- 29
- 5f 76 65 72 73 69 6f 6e 5f
- org.apache.solr.common.util.JavaBinCodec.VERSION
- 02
- NL
- 0a
- writeTag(MAP, 2)
- 02
- endRecord()
- 00 00 00 24
- writeTag(ARR, 3)
- 83
- SINT(1)
- 41
- LONG(07)
- 07
- 13 ce 6e b5 ca 30 00 00
- SOLRINPUTDOC(16)
- 10
- size
- 02
- docBoost(FLOAT)
- 08 3f 80 00 00
- fieldName("id")
- e1 (EXTERN_STRING)
- fieldVal("testdoc")
- 27 74 65 73 74 64 6f 63
- fieldName("_version_")
- e2 (EXTERN_STRING)
- fieldVal
- 07 13 ce 6e b5 ca 30 00 00
- endRecord()
- 00 00 00 25
- codec.writeTag(JavaBinCodec.ARR, 3)
- 83
- codec.writeInt(UpdateLog.COMMIT | flags)
- 44 (SINT(0x40) + UpdateLog.COMMIT (0x04))
- codec.writeLong(cmd.getVersion())
- 60 (SLONG(0x60))
- "SOLR_TLOG_END"
- 2d
- 53 4f 4c 52 5f 54 4c 4f 47 5f 45 4e 44
- endRecord()
- 00 00 00 11
- カテゴリ:
- インターネット
- インターネットセキュリティ
最新コメント