Import from DB-Pedia

DBPedia exports all the entities as GZipped CSV files. Features:

  • First line contains column names, second, third and forth has meta information, which we’ll skip (look at "skipFrom": 1, "skipTo": 3in CSV transformer)
  • The vertex class name is created automatically based on the file name, so we can use the same file against any DBPedia file
  • The Primary Key is the “URI” field, where a UNIQUE index has also been created (refer to “ORIENTDB” loader)
  • The “merge” transformer is used to allow to re-import or update any file without generating duplicates

Configuration

  1. {
  2. "config": {
  3. "log": "debug",
  4. "fileDirectory": "/temp/databases/dbpedia_csv/",
  5. "fileName": "Person.csv.gz"
  6. },
  7. "begin": [
  8. { "let": { "name": "$filePath", "value": "$fileDirectory.append( $fileName )"} },
  9. { "let": { "name": "$className", "value": "$fileName.substring( 0, $fileName.indexOf('.') )"} }
  10. ],
  11. "source" : {
  12. "file": { "path": "$filePath", "lock" : true }
  13. },
  14. "extractor" : {
  15. { "csv": { "separator": ",", "nullValue": "NULL", "skipFrom": 1, "skipTo": 3 } },
  16. },
  17. "transformers" : [
  18. { "merge": { "joinFieldName":"URI", "lookup":"V.URI" } },
  19. { "vertex": { "class": "$className"} }
  20. ],
  21. "loader" : {
  22. "orientdb": {
  23. "dbURL": "plocal:/temp/databases/dbpedia",
  24. "dbUser": "admin",
  25. "dbPassword": "admin",
  26. "dbAutoCreate": true,
  27. "tx": false,
  28. "batchCommit": 1000,
  29. "dbType": "graph",
  30. "indexes": [{"class":"V", "fields":["URI:string"], "type":"UNIQUE" }]
  31. }
  32. }
  33. }