Thursday 11 April 2013

Handling Large List with Content Iterator - SharePoint 2010

 1. Reading all items in Large List


In SharePoint 2010, when you execute SPQuery on Large List, you get exception "The attempted operation is prohibited because it exceeds the list view threshold enforced by the administrator". To avoid this exception and read list items by batch we can use Content Iterator.

There are lot of methods available with ContentIterator, here we discuss about http://msdn.microsoft.com/en-us/library/ee560760%28v=office.14%29.aspx

To use ContentIterator include Microsoft.Office.Server.dll available in 14/ISAPI/ and include namespace Microsoft.Office.Server.Utilities.

Advantage:

  •  Fetches list items as a batch, load is reduced.
  •  If indexed column condition return more value than List View Threshold, it handles by batch. Normal SPQuery fails in this condition.
  •  We can stop batch processing any time.

Disadvantage:

  • You cannot include Non-Indexed column in SPQuery condition.

//Run as console application


using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Microsoft.SharePoint;
using Microsoft.Office.Server.Utilities;

namespace ContentIteratorListItemCollBatch
{
    class Sample
    {
        static int NumberOfBatch = 0, NumberOfItemsRead = 0, NumberOfException = 0;

        static void Main(string[] args)
        {
            using (SPSite site = new SPSite("your site url"))
            {
                using (SPWeb web = site.OpenWeb())
                {
                    SPList list = web.GetList("Lists/LargeList/AllItems.aspx"); //your list url
                    ContentIterator ci = new ContentIterator("Reading All Items");

                    SPQuery qry = new SPQuery();
                    qry.QueryThrottleMode = SPQueryThrottleOption.Strict; //Ensuring that all users come under List View Threshold. (Including farm admins / box administrators).

                    qry.RowLimit = 2000; //Number of Items read in a batch. But it should be less than List View Threshold.
                   
                    qry.Query = qry.Query + ContentIterator.ItemEnumerationOrderByID; //Not Required, Include for faster output. 
                    //Don't use ContentIterator.ItemEnumerationOrderByNVPField, it gets into infinite loop.

                    ci.ProcessListItems(list, qry, ProcessItemColl, ProcessErrorColl);
                    Console.WriteLine("\nBatch count: " + NumberOfBatch + "\n\nTotal number of items read: " + NumberOfItemsRead);
                    Console.ReadLine();
                }
            }
        }

        static public bool ProcessErrorColl(SPListItemCollection itemColl, Exception e)
        {
            // process the error
            NumberOfException++;
            return true;
        }
        static public void ProcessItemColl(SPListItemCollection itemColl)
        {
            //Work on the ListItem Collection object with your own condition
            //foreach (SPListItem item in itemColl)
            //{

            //}
            Console.WriteLine("Number of Items Read: " + itemColl.Count);
            NumberOfBatch++;
            NumberOfItemsRead += itemColl.Count;
        }
    }
}

My large list contains 25,000 items. you can see from output, that it read 25,000 items by batch of 2000 items.

Output


Number of Items Read: 2000
Number of Items Read: 2000
Number of Items Read: 2000
Number of Items Read: 2000
Number of Items Read: 2000
Number of Items Read: 2000
Number of Items Read: 2000
Number of Items Read: 2000
Number of Items Read: 2000
Number of Items Read: 2000
Number of Items Read: 2000
Number of Items Read: 2000
Number of Items Read: 1000

Batch count: 13

Total number of items read: 25000

2. Reading items in a Large List with condition


Ensure following conditions have been met.
  • Only Indexed column is allowed in where condition.
  • You should include ContentIterator.ItemEnumerationOrderByNVPField.
In below code, Title is a indexed column. As soon as custom list is created, title is made as index column.

//Run as Console Application


using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Microsoft.SharePoint;
using Microsoft.Office.Server.Utilities;

namespace ContentIteratorListItemCollBatch
{
    class Sample
    {
        static int NumberOfBatch = 0, NumberOfItemsRead = 0, NumberOfException = 0;

        static void Main(string[] args)
        {
            using (SPSite site = new SPSite("your site url"))
            {
                using (SPWeb web = site.OpenWeb())
                {
                    SPList list = web.GetList("Lists/LargeList/AllItems.aspx"); //your list url
                    ContentIterator ci = new ContentIterator("Reading All Items");

                    SPQuery qry = new SPQuery();
                    qry.QueryThrottleMode = SPQueryThrottleOption.Strict; //Ensuring that all users come under List View Threshold. (Including farm admins / box administrators).

                    qry.RowLimit = 2000; //Number of Items read in a batch. But it should be less than List View Threshold.

                    qry.Query = @"<Where><Contains><FieldRef Name='Title' /><Value Type='Text'>9</Value></Contains></Where>";


                    qry.Query = qry.Query + ContentIterator.ItemEnumerationOrderByNVPField;
                    //Have to include this line.

                    ci.ProcessListItems(list, qry, ProcessItemColl, ProcessErrorColl);
                    Console.WriteLine("\nBatch count: " + NumberOfBatch + "\n\nTotal number of items read: " + NumberOfItemsRead);
                    Console.ReadLine();
                }
            }
        }

        static public bool ProcessErrorColl(SPListItemCollection itemColl, Exception e)
        {
            // process the error
            NumberOfException++;
            return true;
        }
        static public void ProcessItemColl(SPListItemCollection itemColl)
        {
            //Work on the ListItem Collection object with your own condition
            //foreach (SPListItem item in itemColl)
            //{

            //}
            Console.WriteLine("Number of Items Read: " + itemColl.Count);
            NumberOfBatch++;
            NumberOfItemsRead += itemColl.Count;
        }
    }
}

In SPQuery, If indexed field fetches more than List View Threshold limit, it will fail. The ContentIterator handles it by batch processing.

 Output

Number of Items Read: 2000
Number of Items Read: 2000
Number of Items Read: 2000
Number of Items Read: 2000
Number of Items Read: 233

Batch count: 5

Total number of items read: 8233

8 comments:

  1. Hi. Thanks for your great post. I'm interested by the Content Iterator. We've used it to count the number of items corresponding to a criteria in a list containing 60,000 items. All fields used in the SPQuery are indexed. However when running our code on the list the content iterator doesn't seem to take items in batches (even with a RowLimit of 500) as it throws the famous Exception "The attempted operation is prohibited because it exceeds the list view threshold enforced by the administrator.

    We pass an SPQuery to the Content Iterator (as we don't need all the items but only those corresponding to our criteria), we know that the CAML specified in the SPQuery should return more than the threshold (about 40,000 out of 60,000 items). It that the reason why the Content Iterator throws the exception ?

    ReplyDelete
    Replies
    1. Use the second one. Just check qry.Query = qry.Query + ContentIterator.ItemEnumerationOrderByNVPField; is included. Without this, content iterator with condition will not work.

      I am able to successfully execute.

      Delete
    2. ContentIterator.ItemEnumerationOrderByNVPField has been used. However the number of rows in the NVP table is over the threshold so I think that's why the 'order by' fails. In other words 40,000 items have the criteria in my CAML. It's like in your example you'd have 20,001 items with 'Title contains 9' (as your threshold is 20,000). Could you please test and confirm?

      Delete
    3. Content Iterator will not fail if returned result exceeds List View Threshold, it will execute all items by batch.

      I am able to execute list with 30000 items. Use the above code, its executing fine.


      Number of Items Read: 2000
      Number of Items Read: 2000
      Number of Items Read: 2000
      Number of Items Read: 2000
      Number of Items Read: 2000
      Number of Items Read: 2000
      Number of Items Read: 2000
      Number of Items Read: 2000
      Number of Items Read: 2000
      Number of Items Read: 2000
      Number of Items Read: 2000
      Number of Items Read: 2000
      Number of Items Read: 2000
      Number of Items Read: 2000
      Number of Items Read: 2000

      Batch count: 15

      Total number of items read: 30000

      Delete
    4. Are you sure you are not logged in with a server administrator when you run this ?
      Read this post http://squarepoint.blogspot.fr/2013/05/creating-performant-sharepoint-apps.html and you can see he says " if your indexed field has more than 5000 entries in the index table (NameValuePair_Latin1_General_CI_AS or NameValuePair), you'll still get throttled."

      Delete
    5. Below are the steps i followed:

      1. Created a custom list.
      2. Created one extra Multiple line of text column(Just for testing purpose).
      3. Create primary index on Title column.
      4. Now add data from 900001 to 930000.
      5. Execute ContentIterator with contains '9'.
      6. Web part returned the results as expected.

      I executed it as readonly user only. I dont see throttle exception, let me know if i am missing some thing.

      - Prasanna.

      Delete
  2. Hi,

    I am able to get the data beyond the threshold limit in case of only one condition in spquery. But it fails when you add more than one conditions in spquery (all cols are index cols in condition).

    What to do?

    ReplyDelete
  3. http://prasannabj.blogspot.com/2013/04/handling-large-list-with-content.html

    Error as below
    Unhandled Exception: Microsoft.SharePoint.SPQueryThrottledException: The attempt
    ed operation is prohibited because it exceeds the list view threshold enforced b
    y the administrator. ---> System.Runtime.InteropServices.COMException: The attem
    pted operation is prohibited because it exceeds the list view threshold enforced
    by the administrator.0x80070024


    I follow your code but modify a bit as like this. please review code to what is wrong.
    Note: Both DocType and Title have already been created as index column.

    static void ListLargeItemByTitle(string docType, string title)
    {

    using (SPSite site = new SPSite(ConfigurationManager.AppSettings["SiteUrl"]))
    {
    using (SPWeb web = site.OpenWeb())
    {
    SPList list = web.GetList(ConfigurationManager.AppSettings["LibUrl"]);
    ContentIterator itemsIterator = new ContentIterator("Get Item By Title");

    SPQuery qry = new SPQuery();
    qry.QueryThrottleMode = SPQueryThrottleOption.Strict;
    qry.RowLimit = 2000;
    qry.ViewAttributes = "Scope=\"Recursive\"";


    qry.Query =string.Format(@"{0}
    {1}",docType,title);
    qry.Query =qry.Query+ContentIterator.ItemEnumerationOrderByNVPField;


    itemsIterator.ProcessListItems(list, qry, ProcessItemColl, ProcessErrorColl);

    }//web

    }//site

    }

    static public bool ProcessErrorColl(SPListItemCollection itemColl, Exception e)
    {
    // process the error

    return true;
    }
    static public void ProcessItemColl(SPListItemCollection itemColl)
    {
    //Work on the ListItem Collection object with your own condition
    foreach (SPListItem item in itemColl)
    {
    Console.WriteLine(item.Name + "-" + item.Url);
    }


    }


    However I apply traditional way as below. It work well.
    static void ListItemByFileLeafRef( string docType, string jpgFile, string pdfFile)
    {
    using (SPSite site = new SPSite(ConfigurationManager.AppSettings["SiteUrl"]))
    {
    using (SPWeb web = site.OpenWeb())
    {
    SPList list = web.GetList(ConfigurationManager.AppSettings["LibUrl"]);

    SPQuery qry = new SPQuery();

    qry.ViewAttributes = "Scope=\"Recursive\"";
    qry.RowLimit = 2000;


    qry.Query= string.Format(@"
    {0}
    {1}
    {2}",docType,jpgFile,pdfFile);

    SPListItemCollection collListItems = list.GetItems(qry);


    int i = 0;
    foreach (SPListItem item in collListItems)
    {
    Console.WriteLine("Item Info: " + item.Name + "-" + item.Url );
    i++;
    }
    }

    }

    }

    ReplyDelete