// Copyright 2013, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Hannah Bast <bast@cs.uni-freiburg.de>.

// NOTE: this is a code design suggestion in pseudo-code. It is not supposed to
// be compilable in any language. You have to translate it to Java or C++
// yourself. The purpose of this file is to suggest a basic design and settle
// questions you might have on what exactly your code is supossed to do.

// Implementation of a very simple inverted index.
class InvertedIndex {
  // PUBLIC MEMBERS

  // Create an empty index.
  InvertedIndex();

  // Build inverted lists from a given text file. The expected format of the
  // file is one line per document with each line containing the text from the
  // document without newlines.
  //
  // NEW(lecture-02): Still convert all words to lower case, but no longer
  // ignore multiple occurrences of the same word in one document. Instead
  // compute BM25 scores for the given parameters. For this proceed as follows:
  // first store the postings only with simple tf scores (tf = term frequency =
  // number of occurrences of the word in the document). This is easy: just add
  // +1 to the score when you see the word again. When all the inverted lists
  // are done, then for each inverted list transform the tf scores into BM25
  // scores. Note that df  = document frequency = the number of documents
  // containing the word is simply the number of postings in the inverted list.
  void buildFromTextFile(String fileName, float bm25k, float bm25b);

  // Given a query, return a list of all documents, sorted by document id, that
  // contain all of the words in the query. If there are more than k such
  // documents, return only the first k in that order, for the given value of k.
  //
  // NEW(lecture-02): as before, but now (partially) sort the result list by
  // score and return the k documents with the top k scores. When two documents
  // have the same score, rank the one with the smaller document id first.
  Array<String> processQuery(String query, int k);

  // PRIVATE MEMBERS

  // Intersect two sorted lists of integers. Returns the result list = sorted
  // list of ids contained in both input lists.
  //
  // NEW(lecture-02): intersect on the document ids as before, but now when you
  // find a document id that occurs in both lists, add up the scores, and write
  // the sum into the result posting.
  Array<Posting> intersect(Array<Posting> list1, Array<Posting> list2);

  // NEW(lecture-02): an inverted list is now an array of postings, where each
  // posting is a pair of document id and score; see Posting.TIP. The lists are
  // still sorted by document id.
  Map<String, Array<Posting>> invertedLists;

  // The text of all the documents, indexed by document id. Since document ids
  // are consecutive, starting at 0, a simple array suffices.
  Array<String> documents;
}